aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@arm.linux.org.uk>2010-07-31 09:19:22 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2010-07-31 09:19:22 -0400
commitf13b1035ce8bbc27d4ce7c281cddd2718f2cf5b0 (patch)
tree44419bd03c33e662302dd003f57d36e133144f91 /fs
parente910b63d009701ad4ebbeb089aba35707fa5d68e (diff)
parent5da3e714e30d40145f4dd37d79de6bbbcb9e6137 (diff)
Merge branch 'shmobile' into devel
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dir.c2
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/ioctl.c20
-rw-r--r--fs/ceph/Kconfig2
-rw-r--r--fs/ceph/auth_x.c3
-rw-r--r--fs/ceph/caps.c15
-rw-r--r--fs/ceph/dir.c13
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c6
-rw-r--r--fs/ceph/mds_client.c45
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/messenger.c71
-rw-r--r--fs/ceph/mon_client.c6
-rw-r--r--fs/ceph/osd_client.c6
-rw-r--r--fs/ceph/osdmap.c27
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/dns_resolve.c69
-rw-r--r--fs/cifs/dns_resolve.h4
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ecryptfs/messaging.c17
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c33
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/quota.c10
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/transaction.c9
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/ocfs2/aops.c94
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c22
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c309
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/journal.c30
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/partitions/ibm.c14
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/sysfs/symlink.c26
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c130
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h3
-rw-r--r--fs/xfs/quota/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_mount.h2
58 files changed, 1014 insertions, 430 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index d61e3b28ce37..36d961f342af 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -146,7 +146,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
146 while (rdir->head < rdir->tail) { 146 while (rdir->head < rdir->tail) {
147 p9stat_init(&st); 147 p9stat_init(&st);
148 err = p9stat_read(rdir->buf + rdir->head, 148 err = p9stat_read(rdir->buf + rdir->head,
149 buflen - rdir->head, &st, 149 rdir->tail - rdir->head, &st,
150 fid->clnt->proto_version); 150 fid->clnt->proto_version);
151 if (err) { 151 if (err) {
152 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); 152 P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe4..c3df14ce2cc2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2304 return ret; 2304 return ret;
2305} 2305}
2306 2306
2307/*
2308 * min slot controls the lowest index we're willing to push to the
2309 * right. We'll push up to and including min_slot, but no lower
2310 */
2307static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2311static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2308 struct btrfs_root *root, 2312 struct btrfs_root *root,
2309 struct btrfs_path *path, 2313 struct btrfs_path *path,
2310 int data_size, int empty, 2314 int data_size, int empty,
2311 struct extent_buffer *right, 2315 struct extent_buffer *right,
2312 int free_space, u32 left_nritems) 2316 int free_space, u32 left_nritems,
2317 u32 min_slot)
2313{ 2318{
2314 struct extent_buffer *left = path->nodes[0]; 2319 struct extent_buffer *left = path->nodes[0];
2315 struct extent_buffer *upper = path->nodes[1]; 2320 struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2327 if (empty) 2332 if (empty)
2328 nr = 0; 2333 nr = 0;
2329 else 2334 else
2330 nr = 1; 2335 nr = max_t(u32, 1, min_slot);
2331 2336
2332 if (path->slots[0] >= left_nritems) 2337 if (path->slots[0] >= left_nritems)
2333 push_space += data_size; 2338 push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
2469 * 2474 *
2470 * returns 1 if the push failed because the other node didn't have enough 2475 * returns 1 if the push failed because the other node didn't have enough
2471 * room, 0 if everything worked out and < 0 if there were major errors. 2476 * room, 0 if everything worked out and < 0 if there were major errors.
2477 *
2478 * this will push starting from min_slot to the end of the leaf. It won't
2479 * push any slot lower than min_slot
2472 */ 2480 */
2473static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2481static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2474 *root, struct btrfs_path *path, int data_size, 2482 *root, struct btrfs_path *path,
2475 int empty) 2483 int min_data_size, int data_size,
2484 int empty, u32 min_slot)
2476{ 2485{
2477 struct extent_buffer *left = path->nodes[0]; 2486 struct extent_buffer *left = path->nodes[0];
2478 struct extent_buffer *right; 2487 struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2514 if (left_nritems == 0) 2523 if (left_nritems == 0)
2515 goto out_unlock; 2524 goto out_unlock;
2516 2525
2517 return __push_leaf_right(trans, root, path, data_size, empty, 2526 return __push_leaf_right(trans, root, path, min_data_size, empty,
2518 right, free_space, left_nritems); 2527 right, free_space, left_nritems, min_slot);
2519out_unlock: 2528out_unlock:
2520 btrfs_tree_unlock(right); 2529 btrfs_tree_unlock(right);
2521 free_extent_buffer(right); 2530 free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
2525/* 2534/*
2526 * push some data in the path leaf to the left, trying to free up at 2535 * push some data in the path leaf to the left, trying to free up at
2527 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2536 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2537 *
2538 * max_slot can put a limit on how far into the leaf we'll push items. The
2539 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2540 * items
2528 */ 2541 */
2529static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2542static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2530 struct btrfs_root *root, 2543 struct btrfs_root *root,
2531 struct btrfs_path *path, int data_size, 2544 struct btrfs_path *path, int data_size,
2532 int empty, struct extent_buffer *left, 2545 int empty, struct extent_buffer *left,
2533 int free_space, int right_nritems) 2546 int free_space, u32 right_nritems,
2547 u32 max_slot)
2534{ 2548{
2535 struct btrfs_disk_key disk_key; 2549 struct btrfs_disk_key disk_key;
2536 struct extent_buffer *right = path->nodes[0]; 2550 struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2549 slot = path->slots[1]; 2563 slot = path->slots[1];
2550 2564
2551 if (empty) 2565 if (empty)
2552 nr = right_nritems; 2566 nr = min(right_nritems, max_slot);
2553 else 2567 else
2554 nr = right_nritems - 1; 2568 nr = min(right_nritems - 1, max_slot);
2555 2569
2556 for (i = 0; i < nr; i++) { 2570 for (i = 0; i < nr; i++) {
2557 item = btrfs_item_nr(right, i); 2571 item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
2712/* 2726/*
2713 * push some data in the path leaf to the left, trying to free up at 2727 * push some data in the path leaf to the left, trying to free up at
2714 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2728 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2729 *
2730 * max_slot can put a limit on how far into the leaf we'll push items. The
2731 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2732 * items
2715 */ 2733 */
2716static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2734static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2717 *root, struct btrfs_path *path, int data_size, 2735 *root, struct btrfs_path *path, int min_data_size,
2718 int empty) 2736 int data_size, int empty, u32 max_slot)
2719{ 2737{
2720 struct extent_buffer *right = path->nodes[0]; 2738 struct extent_buffer *right = path->nodes[0];
2721 struct extent_buffer *left; 2739 struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2761 goto out; 2779 goto out;
2762 } 2780 }
2763 2781
2764 return __push_leaf_left(trans, root, path, data_size, 2782 return __push_leaf_left(trans, root, path, min_data_size,
2765 empty, left, free_space, right_nritems); 2783 empty, left, free_space, right_nritems,
2784 max_slot);
2766out: 2785out:
2767 btrfs_tree_unlock(left); 2786 btrfs_tree_unlock(left);
2768 free_extent_buffer(left); 2787 free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2855} 2874}
2856 2875
2857/* 2876/*
2877 * double splits happen when we need to insert a big item in the middle
2878 * of a leaf. A double split can leave us with 3 mostly empty leaves:
2879 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2880 * A B C
2881 *
2882 * We avoid this by trying to push the items on either side of our target
2883 * into the adjacent leaves. If all goes well we can avoid the double split
2884 * completely.
2885 */
2886static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2887 struct btrfs_root *root,
2888 struct btrfs_path *path,
2889 int data_size)
2890{
2891 int ret;
2892 int progress = 0;
2893 int slot;
2894 u32 nritems;
2895
2896 slot = path->slots[0];
2897
2898 /*
2899 * try to push all the items after our slot into the
2900 * right leaf
2901 */
2902 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2903 if (ret < 0)
2904 return ret;
2905
2906 if (ret == 0)
2907 progress++;
2908
2909 nritems = btrfs_header_nritems(path->nodes[0]);
2910 /*
2911 * our goal is to get our slot at the start or end of a leaf. If
2912 * we've done so we're done
2913 */
2914 if (path->slots[0] == 0 || path->slots[0] == nritems)
2915 return 0;
2916
2917 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2918 return 0;
2919
2920 /* try to push all the items before our slot into the next leaf */
2921 slot = path->slots[0];
2922 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2923 if (ret < 0)
2924 return ret;
2925
2926 if (ret == 0)
2927 progress++;
2928
2929 if (progress)
2930 return 0;
2931 return 1;
2932}
2933
2934/*
2858 * split the path's leaf in two, making sure there is at least data_size 2935 * split the path's leaf in two, making sure there is at least data_size
2859 * available for the resulting leaf level of the path. 2936 * available for the resulting leaf level of the path.
2860 * 2937 *
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2876 int wret; 2953 int wret;
2877 int split; 2954 int split;
2878 int num_doubles = 0; 2955 int num_doubles = 0;
2956 int tried_avoid_double = 0;
2879 2957
2880 l = path->nodes[0]; 2958 l = path->nodes[0];
2881 slot = path->slots[0]; 2959 slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2884 return -EOVERFLOW; 2962 return -EOVERFLOW;
2885 2963
2886 /* first try to make some room by pushing left and right */ 2964 /* first try to make some room by pushing left and right */
2887 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2965 if (data_size) {
2888 wret = push_leaf_right(trans, root, path, data_size, 0); 2966 wret = push_leaf_right(trans, root, path, data_size,
2967 data_size, 0, 0);
2889 if (wret < 0) 2968 if (wret < 0)
2890 return wret; 2969 return wret;
2891 if (wret) { 2970 if (wret) {
2892 wret = push_leaf_left(trans, root, path, data_size, 0); 2971 wret = push_leaf_left(trans, root, path, data_size,
2972 data_size, 0, (u32)-1);
2893 if (wret < 0) 2973 if (wret < 0)
2894 return wret; 2974 return wret;
2895 } 2975 }
@@ -2923,6 +3003,8 @@ again:
2923 if (mid != nritems && 3003 if (mid != nritems &&
2924 leaf_space_used(l, mid, nritems - mid) + 3004 leaf_space_used(l, mid, nritems - mid) +
2925 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3005 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3006 if (data_size && !tried_avoid_double)
3007 goto push_for_double;
2926 split = 2; 3008 split = 2;
2927 } 3009 }
2928 } 3010 }
@@ -2939,6 +3021,8 @@ again:
2939 if (mid != nritems && 3021 if (mid != nritems &&
2940 leaf_space_used(l, mid, nritems - mid) + 3022 leaf_space_used(l, mid, nritems - mid) +
2941 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3023 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3024 if (data_size && !tried_avoid_double)
3025 goto push_for_double;
2942 split = 2 ; 3026 split = 2 ;
2943 } 3027 }
2944 } 3028 }
@@ -3019,6 +3103,13 @@ again:
3019 } 3103 }
3020 3104
3021 return ret; 3105 return ret;
3106
3107push_for_double:
3108 push_for_double_split(trans, root, path, data_size);
3109 tried_avoid_double = 1;
3110 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3111 return 0;
3112 goto again;
3022} 3113}
3023 3114
3024static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3115static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3915 extent_buffer_get(leaf); 4006 extent_buffer_get(leaf);
3916 4007
3917 btrfs_set_path_blocking(path); 4008 btrfs_set_path_blocking(path);
3918 wret = push_leaf_left(trans, root, path, 1, 1); 4009 wret = push_leaf_left(trans, root, path, 1, 1,
4010 1, (u32)-1);
3919 if (wret < 0 && wret != -ENOSPC) 4011 if (wret < 0 && wret != -ENOSPC)
3920 ret = wret; 4012 ret = wret;
3921 4013
3922 if (path->nodes[0] == leaf && 4014 if (path->nodes[0] == leaf &&
3923 btrfs_header_nritems(leaf)) { 4015 btrfs_header_nritems(leaf)) {
3924 wret = push_leaf_right(trans, root, path, 1, 1); 4016 wret = push_leaf_right(trans, root, path, 1,
4017 1, 1, 0);
3925 if (wret < 0 && wret != -ENOSPC) 4018 if (wret < 0 && wret != -ENOSPC)
3926 ret = wret; 4019 ret = wret;
3927 } 4020 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4dbaf89b1337..9254b3d58dbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1458 */ 1458 */
1459 1459
1460 /* the destination must be opened for writing */ 1460 /* the destination must be opened for writing */
1461 if (!(file->f_mode & FMODE_WRITE)) 1461 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
1462 return -EINVAL; 1462 return -EINVAL;
1463 1463
1464 ret = mnt_want_write(file->f_path.mnt); 1464 ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1511 1511
1512 /* determine range to clone */ 1512 /* determine range to clone */
1513 ret = -EINVAL; 1513 ret = -EINVAL;
1514 if (off >= src->i_size || off + len > src->i_size) 1514 if (off + len > src->i_size || off + len < off)
1515 goto out_unlock; 1515 goto out_unlock;
1516 if (len == 0) 1516 if (len == 0)
1517 olen = len = src->i_size - off; 1517 olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1578 u64 disko = 0, diskl = 0; 1578 u64 disko = 0, diskl = 0;
1579 u64 datao = 0, datal = 0; 1579 u64 datao = 0, datal = 0;
1580 u8 comp; 1580 u8 comp;
1581 u64 endoff;
1581 1582
1582 size = btrfs_item_size_nr(leaf, slot); 1583 size = btrfs_item_size_nr(leaf, slot);
1583 read_extent_buffer(leaf, buf, 1584 read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1712 btrfs_release_path(root, path); 1713 btrfs_release_path(root, path);
1713 1714
1714 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1715 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1715 if (new_key.offset + datal > inode->i_size) 1716
1716 btrfs_i_size_write(inode, 1717 /*
1717 new_key.offset + datal); 1718 * we round up to the block size at eof when
1719 * determining which extents to clone above,
1720 * but shouldn't round up the file size
1721 */
1722 endoff = new_key.offset + datal;
1723 if (endoff > off+olen)
1724 endoff = off+olen;
1725 if (endoff > inode->i_size)
1726 btrfs_i_size_write(inode, endoff);
1727
1718 BTRFS_I(inode)->flags = BTRFS_I(src)->flags; 1728 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
1719 ret = btrfs_update_inode(trans, root, inode); 1729 ret = btrfs_update_inode(trans, root, inode);
1720 BUG_ON(ret); 1730 BUG_ON(ret);
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 04b8280582a9..bc87b9c1d27e 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -2,7 +2,7 @@ config CEPH_FS
2 tristate "Ceph distributed file system (EXPERIMENTAL)" 2 tristate "Ceph distributed file system (EXPERIMENTAL)"
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select LIBCRC32C 4 select LIBCRC32C
5 select CONFIG_CRYPTO_AES 5 select CRYPTO_AES
6 help 6 help
7 Choose Y or M here to include support for mounting the 7 Choose Y or M here to include support for mounting the
8 experimental Ceph distributed file system. Ceph is an extremely 8 experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 3fe49042d8ad..6d44053ecff1 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
613 remove_ticket_handler(ac, th); 613 remove_ticket_handler(ac, th);
614 } 614 }
615 615
616 if (xi->auth_authorizer.buf)
617 ceph_buffer_put(xi->auth_authorizer.buf);
618
616 kfree(ac->private); 619 kfree(ac->private);
617 ac->private = NULL; 620 ac->private = NULL;
618} 621}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 74144d6389f0..b81be9a56487 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -627,7 +627,7 @@ retry:
627 if (fmode >= 0) 627 if (fmode >= 0)
628 __ceph_get_fmode(ci, fmode); 628 __ceph_get_fmode(ci, fmode);
629 spin_unlock(&inode->i_lock); 629 spin_unlock(&inode->i_lock);
630 wake_up(&ci->i_cap_wq); 630 wake_up_all(&ci->i_cap_wq);
631 return 0; 631 return 0;
632} 632}
633 633
@@ -1181,7 +1181,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1181 } 1181 }
1182 1182
1183 if (wake) 1183 if (wake)
1184 wake_up(&ci->i_cap_wq); 1184 wake_up_all(&ci->i_cap_wq);
1185 1185
1186 return delayed; 1186 return delayed;
1187} 1187}
@@ -2153,7 +2153,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2153 else if (flushsnaps) 2153 else if (flushsnaps)
2154 ceph_flush_snaps(ci); 2154 ceph_flush_snaps(ci);
2155 if (wake) 2155 if (wake)
2156 wake_up(&ci->i_cap_wq); 2156 wake_up_all(&ci->i_cap_wq);
2157 if (put) 2157 if (put)
2158 iput(inode); 2158 iput(inode);
2159} 2159}
@@ -2229,7 +2229,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2229 iput(inode); 2229 iput(inode);
2230 } else if (complete_capsnap) { 2230 } else if (complete_capsnap) {
2231 ceph_flush_snaps(ci); 2231 ceph_flush_snaps(ci);
2232 wake_up(&ci->i_cap_wq); 2232 wake_up_all(&ci->i_cap_wq);
2233 } 2233 }
2234 if (drop_capsnap) 2234 if (drop_capsnap)
2235 iput(inode); 2235 iput(inode);
@@ -2405,7 +2405,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2405 if (queue_invalidate) 2405 if (queue_invalidate)
2406 ceph_queue_invalidate(inode); 2406 ceph_queue_invalidate(inode);
2407 if (wake) 2407 if (wake)
2408 wake_up(&ci->i_cap_wq); 2408 wake_up_all(&ci->i_cap_wq);
2409 2409
2410 if (check_caps == 1) 2410 if (check_caps == 1)
2411 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, 2411 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
@@ -2460,7 +2460,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2460 struct ceph_inode_info, 2460 struct ceph_inode_info,
2461 i_flushing_item)->vfs_inode); 2461 i_flushing_item)->vfs_inode);
2462 mdsc->num_cap_flushing--; 2462 mdsc->num_cap_flushing--;
2463 wake_up(&mdsc->cap_flushing_wq); 2463 wake_up_all(&mdsc->cap_flushing_wq);
2464 dout(" inode %p now !flushing\n", inode); 2464 dout(" inode %p now !flushing\n", inode);
2465 2465
2466 if (ci->i_dirty_caps == 0) { 2466 if (ci->i_dirty_caps == 0) {
@@ -2472,7 +2472,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2472 } 2472 }
2473 } 2473 }
2474 spin_unlock(&mdsc->cap_dirty_lock); 2474 spin_unlock(&mdsc->cap_dirty_lock);
2475 wake_up(&ci->i_cap_wq); 2475 wake_up_all(&ci->i_cap_wq);
2476 2476
2477out: 2477out:
2478 spin_unlock(&inode->i_lock); 2478 spin_unlock(&inode->i_lock);
@@ -2984,6 +2984,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
2984 memcpy(*p, dentry->d_name.name, dentry->d_name.len); 2984 memcpy(*p, dentry->d_name.name, dentry->d_name.len);
2985 *p += dentry->d_name.len; 2985 *p += dentry->d_name.len;
2986 rel->dname_seq = cpu_to_le32(di->lease_seq); 2986 rel->dname_seq = cpu_to_le32(di->lease_seq);
2987 __ceph_mdsc_drop_dentry_lease(dentry);
2987 } 2988 }
2988 spin_unlock(&dentry->d_lock); 2989 spin_unlock(&dentry->d_lock);
2989 return ret; 2990 return ret;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f85719310db2..f94ed3c7f6a5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -266,6 +266,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
266 spin_lock(&inode->i_lock); 266 spin_lock(&inode->i_lock);
267 if ((filp->f_pos == 2 || fi->dentry) && 267 if ((filp->f_pos == 2 || fi->dentry) &&
268 !ceph_test_opt(client, NOASYNCREADDIR) && 268 !ceph_test_opt(client, NOASYNCREADDIR) &&
269 ceph_snap(inode) != CEPH_SNAPDIR &&
269 (ci->i_ceph_flags & CEPH_I_COMPLETE) && 270 (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
270 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 271 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
271 err = __dcache_readdir(filp, dirent, filldir); 272 err = __dcache_readdir(filp, dirent, filldir);
@@ -1013,18 +1014,22 @@ out_touch:
1013 1014
1014/* 1015/*
1015 * When a dentry is released, clear the dir I_COMPLETE if it was part 1016 * When a dentry is released, clear the dir I_COMPLETE if it was part
1016 * of the current dir gen. 1017 * of the current dir gen or if this is in the snapshot namespace.
1017 */ 1018 */
1018static void ceph_dentry_release(struct dentry *dentry) 1019static void ceph_dentry_release(struct dentry *dentry)
1019{ 1020{
1020 struct ceph_dentry_info *di = ceph_dentry(dentry); 1021 struct ceph_dentry_info *di = ceph_dentry(dentry);
1021 struct inode *parent_inode = dentry->d_parent->d_inode; 1022 struct inode *parent_inode = dentry->d_parent->d_inode;
1023 u64 snapid = ceph_snap(parent_inode);
1022 1024
1023 if (parent_inode) { 1025 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1026
1027 if (parent_inode && snapid != CEPH_SNAPDIR) {
1024 struct ceph_inode_info *ci = ceph_inode(parent_inode); 1028 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1025 1029
1026 spin_lock(&parent_inode->i_lock); 1030 spin_lock(&parent_inode->i_lock);
1027 if (ci->i_shared_gen == di->lease_shared_gen) { 1031 if (ci->i_shared_gen == di->lease_shared_gen ||
1032 snapid <= CEPH_MAXSNAP) {
1028 dout(" clearing %p complete (d_release)\n", 1033 dout(" clearing %p complete (d_release)\n",
1029 parent_inode); 1034 parent_inode);
1030 ci->i_ceph_flags &= ~CEPH_I_COMPLETE; 1035 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
@@ -1241,7 +1246,9 @@ struct dentry_operations ceph_dentry_ops = {
1241 1246
1242struct dentry_operations ceph_snapdir_dentry_ops = { 1247struct dentry_operations ceph_snapdir_dentry_ops = {
1243 .d_revalidate = ceph_snapdir_d_revalidate, 1248 .d_revalidate = ceph_snapdir_d_revalidate,
1249 .d_release = ceph_dentry_release,
1244}; 1250};
1245 1251
1246struct dentry_operations ceph_snap_dentry_ops = { 1252struct dentry_operations ceph_snap_dentry_ops = {
1253 .d_release = ceph_dentry_release,
1247}; 1254};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6251a1574b94..7c08698fad3e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -265,7 +265,7 @@ int ceph_release(struct inode *inode, struct file *file)
265 kmem_cache_free(ceph_file_cachep, cf); 265 kmem_cache_free(ceph_file_cachep, cf);
266 266
267 /* wake up anyone waiting for caps on this inode */ 267 /* wake up anyone waiting for caps on this inode */
268 wake_up(&ci->i_cap_wq); 268 wake_up_all(&ci->i_cap_wq);
269 return 0; 269 return 0;
270} 270}
271 271
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8f9b9fe8ef9f..389f9dbd9949 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1199,8 +1199,10 @@ retry_lookup:
1199 goto out; 1199 goto out;
1200 } 1200 }
1201 err = ceph_init_dentry(dn); 1201 err = ceph_init_dentry(dn);
1202 if (err < 0) 1202 if (err < 0) {
1203 dput(dn);
1203 goto out; 1204 goto out;
1205 }
1204 } else if (dn->d_inode && 1206 } else if (dn->d_inode &&
1205 (ceph_ino(dn->d_inode) != vino.ino || 1207 (ceph_ino(dn->d_inode) != vino.ino ||
1206 ceph_snap(dn->d_inode) != vino.snap)) { 1208 ceph_snap(dn->d_inode) != vino.snap)) {
@@ -1499,7 +1501,7 @@ retry:
1499 if (wrbuffer_refs == 0) 1501 if (wrbuffer_refs == 0)
1500 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 1502 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
1501 if (wake) 1503 if (wake)
1502 wake_up(&ci->i_cap_wq); 1504 wake_up_all(&ci->i_cap_wq);
1503} 1505}
1504 1506
1505 1507
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3ab79f6c4ce8..dd440bd438a9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -868,7 +868,7 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
868{ 868{
869 struct ceph_inode_info *ci = ceph_inode(inode); 869 struct ceph_inode_info *ci = ceph_inode(inode);
870 870
871 wake_up(&ci->i_cap_wq); 871 wake_up_all(&ci->i_cap_wq);
872 if (arg) { 872 if (arg) {
873 spin_lock(&inode->i_lock); 873 spin_lock(&inode->i_lock);
874 ci->i_wanted_max_size = 0; 874 ci->i_wanted_max_size = 0;
@@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1514 ceph_encode_filepath(&p, end, ino1, path1); 1514 ceph_encode_filepath(&p, end, ino1, path1);
1515 ceph_encode_filepath(&p, end, ino2, path2); 1515 ceph_encode_filepath(&p, end, ino2, path2);
1516 1516
1517 /* make note of release offset, in case we need to replay */
1518 req->r_request_release_offset = p - msg->front.iov_base;
1519
1517 /* cap releases */ 1520 /* cap releases */
1518 releases = 0; 1521 releases = 0;
1519 if (req->r_inode_drop) 1522 if (req->r_inode_drop)
@@ -1561,7 +1564,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
1561 if (req->r_callback) 1564 if (req->r_callback)
1562 req->r_callback(mdsc, req); 1565 req->r_callback(mdsc, req);
1563 else 1566 else
1564 complete(&req->r_completion); 1567 complete_all(&req->r_completion);
1565} 1568}
1566 1569
1567/* 1570/*
@@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
1580 dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, 1583 dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
1581 req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); 1584 req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
1582 1585
1586 if (req->r_got_unsafe) {
1587 /*
1588 * Replay. Do not regenerate message (and rebuild
1589 * paths, etc.); just use the original message.
1590 * Rebuilding paths will break for renames because
1591 * d_move mangles the src name.
1592 */
1593 msg = req->r_request;
1594 rhead = msg->front.iov_base;
1595
1596 flags = le32_to_cpu(rhead->flags);
1597 flags |= CEPH_MDS_FLAG_REPLAY;
1598 rhead->flags = cpu_to_le32(flags);
1599
1600 if (req->r_target_inode)
1601 rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
1602
1603 rhead->num_retry = req->r_attempts - 1;
1604
1605 /* remove cap/dentry releases from message */
1606 rhead->num_releases = 0;
1607 msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
1608 msg->front.iov_len = req->r_request_release_offset;
1609 return 0;
1610 }
1611
1583 if (req->r_request) { 1612 if (req->r_request) {
1584 ceph_msg_put(req->r_request); 1613 ceph_msg_put(req->r_request);
1585 req->r_request = NULL; 1614 req->r_request = NULL;
@@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
1601 rhead->flags = cpu_to_le32(flags); 1630 rhead->flags = cpu_to_le32(flags);
1602 rhead->num_fwd = req->r_num_fwd; 1631 rhead->num_fwd = req->r_num_fwd;
1603 rhead->num_retry = req->r_attempts - 1; 1632 rhead->num_retry = req->r_attempts - 1;
1633 rhead->ino = 0;
1604 1634
1605 dout(" r_locked_dir = %p\n", req->r_locked_dir); 1635 dout(" r_locked_dir = %p\n", req->r_locked_dir);
1606
1607 if (req->r_target_inode && req->r_got_unsafe)
1608 rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
1609 else
1610 rhead->ino = 0;
1611 return 0; 1636 return 0;
1612} 1637}
1613 1638
@@ -1907,7 +1932,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1907 if (head->safe) { 1932 if (head->safe) {
1908 req->r_got_safe = true; 1933 req->r_got_safe = true;
1909 __unregister_request(mdsc, req); 1934 __unregister_request(mdsc, req);
1910 complete(&req->r_safe_completion); 1935 complete_all(&req->r_safe_completion);
1911 1936
1912 if (req->r_got_unsafe) { 1937 if (req->r_got_unsafe) {
1913 /* 1938 /*
@@ -1922,7 +1947,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1922 1947
1923 /* last unsafe request during umount? */ 1948 /* last unsafe request during umount? */
1924 if (mdsc->stopping && !__get_oldest_req(mdsc)) 1949 if (mdsc->stopping && !__get_oldest_req(mdsc))
1925 complete(&mdsc->safe_umount_waiters); 1950 complete_all(&mdsc->safe_umount_waiters);
1926 mutex_unlock(&mdsc->mutex); 1951 mutex_unlock(&mdsc->mutex);
1927 goto out; 1952 goto out;
1928 } 1953 }
@@ -2101,7 +2126,7 @@ static void handle_session(struct ceph_mds_session *session,
2101 pr_info("mds%d reconnect denied\n", session->s_mds); 2126 pr_info("mds%d reconnect denied\n", session->s_mds);
2102 remove_session_caps(session); 2127 remove_session_caps(session);
2103 wake = 1; /* for good measure */ 2128 wake = 1; /* for good measure */
2104 complete(&mdsc->session_close_waiters); 2129 complete_all(&mdsc->session_close_waiters);
2105 kick_requests(mdsc, mds); 2130 kick_requests(mdsc, mds);
2106 break; 2131 break;
2107 2132
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index b292fa42a66d..952410c60d09 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -188,6 +188,7 @@ struct ceph_mds_request {
188 int r_old_inode_drop, r_old_inode_unless; 188 int r_old_inode_drop, r_old_inode_unless;
189 189
190 struct ceph_msg *r_request; /* original request */ 190 struct ceph_msg *r_request; /* original request */
191 int r_request_release_offset;
191 struct ceph_msg *r_reply; 192 struct ceph_msg *r_reply;
192 struct ceph_mds_reply_info_parsed r_reply_info; 193 struct ceph_mds_reply_info_parsed r_reply_info;
193 int r_err; 194 int r_err;
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 9ad43a310a41..15167b2daa55 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con);
43 * nicely render a sockaddr as a string. 43 * nicely render a sockaddr as a string.
44 */ 44 */
45#define MAX_ADDR_STR 20 45#define MAX_ADDR_STR 20
46static char addr_str[MAX_ADDR_STR][40]; 46#define MAX_ADDR_STR_LEN 60
47static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
47static DEFINE_SPINLOCK(addr_str_lock); 48static DEFINE_SPINLOCK(addr_str_lock);
48static int last_addr_str; 49static int last_addr_str;
49 50
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss)
52 int i; 53 int i;
53 char *s; 54 char *s;
54 struct sockaddr_in *in4 = (void *)ss; 55 struct sockaddr_in *in4 = (void *)ss;
55 unsigned char *quad = (void *)&in4->sin_addr.s_addr;
56 struct sockaddr_in6 *in6 = (void *)ss; 56 struct sockaddr_in6 *in6 = (void *)ss;
57 57
58 spin_lock(&addr_str_lock); 58 spin_lock(&addr_str_lock);
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss)
64 64
65 switch (ss->ss_family) { 65 switch (ss->ss_family) {
66 case AF_INET: 66 case AF_INET:
67 sprintf(s, "%u.%u.%u.%u:%u", 67 snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
68 (unsigned int)quad[0], 68 (unsigned int)ntohs(in4->sin_port));
69 (unsigned int)quad[1],
70 (unsigned int)quad[2],
71 (unsigned int)quad[3],
72 (unsigned int)ntohs(in4->sin_port));
73 break; 69 break;
74 70
75 case AF_INET6: 71 case AF_INET6:
76 sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u", 72 snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
77 in6->sin6_addr.s6_addr16[0], 73 (unsigned int)ntohs(in6->sin6_port));
78 in6->sin6_addr.s6_addr16[1],
79 in6->sin6_addr.s6_addr16[2],
80 in6->sin6_addr.s6_addr16[3],
81 in6->sin6_addr.s6_addr16[4],
82 in6->sin6_addr.s6_addr16[5],
83 in6->sin6_addr.s6_addr16[6],
84 in6->sin6_addr.s6_addr16[7],
85 (unsigned int)ntohs(in6->sin6_port));
86 break; 74 break;
87 75
88 default: 76 default:
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock,
215 */ 203 */
216static struct socket *ceph_tcp_connect(struct ceph_connection *con) 204static struct socket *ceph_tcp_connect(struct ceph_connection *con)
217{ 205{
218 struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr; 206 struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
219 struct socket *sock; 207 struct socket *sock;
220 int ret; 208 int ret;
221 209
222 BUG_ON(con->sock); 210 BUG_ON(con->sock);
223 ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); 211 ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
212 IPPROTO_TCP, &sock);
224 if (ret) 213 if (ret)
225 return ERR_PTR(ret); 214 return ERR_PTR(ret);
226 con->sock = sock; 215 con->sock = sock;
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
234 223
235 dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); 224 dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
236 225
237 ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK); 226 ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
227 O_NONBLOCK);
238 if (ret == -EINPROGRESS) { 228 if (ret == -EINPROGRESS) {
239 dout("connect %s EINPROGRESS sk_state = %u\n", 229 dout("connect %s EINPROGRESS sk_state = %u\n",
240 pr_addr(&con->peer_addr.in_addr), 230 pr_addr(&con->peer_addr.in_addr),
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end,
1009 struct sockaddr_in *in4 = (void *)ss; 999 struct sockaddr_in *in4 = (void *)ss;
1010 struct sockaddr_in6 *in6 = (void *)ss; 1000 struct sockaddr_in6 *in6 = (void *)ss;
1011 int port; 1001 int port;
1002 char delim = ',';
1003
1004 if (*p == '[') {
1005 delim = ']';
1006 p++;
1007 }
1012 1008
1013 memset(ss, 0, sizeof(*ss)); 1009 memset(ss, 0, sizeof(*ss));
1014 if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, 1010 if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
1015 ',', &ipend)) { 1011 delim, &ipend))
1016 ss->ss_family = AF_INET; 1012 ss->ss_family = AF_INET;
1017 } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, 1013 else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
1018 ',', &ipend)) { 1014 delim, &ipend))
1019 ss->ss_family = AF_INET6; 1015 ss->ss_family = AF_INET6;
1020 } else { 1016 else
1021 goto bad; 1017 goto bad;
1022 }
1023 p = ipend; 1018 p = ipend;
1024 1019
1020 if (delim == ']') {
1021 if (*p != ']') {
1022 dout("missing matching ']'\n");
1023 goto bad;
1024 }
1025 p++;
1026 }
1027
1025 /* port? */ 1028 /* port? */
1026 if (p < end && *p == ':') { 1029 if (p < end && *p == ':') {
1027 port = 0; 1030 port = 0;
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end,
1055 return 0; 1058 return 0;
1056 1059
1057bad: 1060bad:
1058 pr_err("parse_ips bad ip '%s'\n", c); 1061 pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
1059 return -EINVAL; 1062 return -EINVAL;
1060} 1063}
1061 1064
@@ -2015,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
2015{ 2018{
2016 mutex_lock(&con->mutex); 2019 mutex_lock(&con->mutex);
2017 if (!list_empty(&msg->list_head)) { 2020 if (!list_empty(&msg->list_head)) {
2018 dout("con_revoke %p msg %p\n", con, msg); 2021 dout("con_revoke %p msg %p - was on queue\n", con, msg);
2019 list_del_init(&msg->list_head); 2022 list_del_init(&msg->list_head);
2020 ceph_msg_put(msg); 2023 ceph_msg_put(msg);
2021 msg->hdr.seq = 0; 2024 msg->hdr.seq = 0;
2022 if (con->out_msg == msg) { 2025 }
2023 ceph_msg_put(con->out_msg); 2026 if (con->out_msg == msg) {
2024 con->out_msg = NULL; 2027 dout("con_revoke %p msg %p - was sending\n", con, msg);
2025 } 2028 con->out_msg = NULL;
2026 if (con->out_kvec_is_msg) { 2029 if (con->out_kvec_is_msg) {
2027 con->out_skip = con->out_kvec_bytes; 2030 con->out_skip = con->out_kvec_bytes;
2028 con->out_kvec_is_msg = false; 2031 con->out_kvec_is_msg = false;
2029 } 2032 }
2030 } else { 2033 ceph_msg_put(msg);
2031 dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg); 2034 msg->hdr.seq = 0;
2032 } 2035 }
2033 mutex_unlock(&con->mutex); 2036 mutex_unlock(&con->mutex);
2034} 2037}
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index cc115eafae11..54fe01c50706 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -345,7 +345,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
345 345
346out: 346out:
347 mutex_unlock(&monc->mutex); 347 mutex_unlock(&monc->mutex);
348 wake_up(&client->auth_wq); 348 wake_up_all(&client->auth_wq);
349} 349}
350 350
351/* 351/*
@@ -462,7 +462,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc,
462 } 462 }
463 mutex_unlock(&monc->mutex); 463 mutex_unlock(&monc->mutex);
464 if (req) { 464 if (req) {
465 complete(&req->completion); 465 complete_all(&req->completion);
466 put_generic_request(req); 466 put_generic_request(req);
467 } 467 }
468 return; 468 return;
@@ -718,7 +718,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
718 monc->m_auth->front_max); 718 monc->m_auth->front_max);
719 if (ret < 0) { 719 if (ret < 0) {
720 monc->client->auth_err = ret; 720 monc->client->auth_err = ret;
721 wake_up(&monc->client->auth_wq); 721 wake_up_all(&monc->client->auth_wq);
722 } else if (ret > 0) { 722 } else if (ret > 0) {
723 __send_prepared_auth_request(monc, ret); 723 __send_prepared_auth_request(monc, ret);
724 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { 724 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 92b7251a53f1..e38522347898 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -862,12 +862,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
862 if (req->r_callback) 862 if (req->r_callback)
863 req->r_callback(req, msg); 863 req->r_callback(req, msg);
864 else 864 else
865 complete(&req->r_completion); 865 complete_all(&req->r_completion);
866 866
867 if (flags & CEPH_OSD_FLAG_ONDISK) { 867 if (flags & CEPH_OSD_FLAG_ONDISK) {
868 if (req->r_safe_callback) 868 if (req->r_safe_callback)
869 req->r_safe_callback(req, msg); 869 req->r_safe_callback(req, msg);
870 complete(&req->r_safe_completion); /* fsync waiter */ 870 complete_all(&req->r_safe_completion); /* fsync waiter */
871 } 871 }
872 872
873done: 873done:
@@ -1083,7 +1083,7 @@ done:
1083 if (newmap) 1083 if (newmap)
1084 kick_requests(osdc, NULL); 1084 kick_requests(osdc, NULL);
1085 up_read(&osdc->map_sem); 1085 up_read(&osdc->map_sem);
1086 wake_up(&osdc->client->auth_wq); 1086 wake_up_all(&osdc->client->auth_wq);
1087 return; 1087 return;
1088 1088
1089bad: 1089bad:
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 50ce64ebd330..416d46adbf87 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
568 if (ev > CEPH_PG_POOL_VERSION) { 568 if (ev > CEPH_PG_POOL_VERSION) {
569 pr_warning("got unknown v %d > %d of ceph_pg_pool\n", 569 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
570 ev, CEPH_PG_POOL_VERSION); 570 ev, CEPH_PG_POOL_VERSION);
571 kfree(pi);
571 goto bad; 572 goto bad;
572 } 573 }
573 __decode_pool(p, pi); 574 __decode_pool(p, pi);
@@ -830,12 +831,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
830 /* remove any? */ 831 /* remove any? */
831 while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, 832 while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
832 node)->pgid, pgid) <= 0) { 833 node)->pgid, pgid) <= 0) {
833 struct rb_node *cur = rbp; 834 struct ceph_pg_mapping *cur =
835 rb_entry(rbp, struct ceph_pg_mapping, node);
836
834 rbp = rb_next(rbp); 837 rbp = rb_next(rbp);
835 dout(" removed pg_temp %llx\n", 838 dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
836 *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, 839 rb_erase(&cur->node, &map->pg_temp);
837 node)->pgid); 840 kfree(cur);
838 rb_erase(cur, &map->pg_temp);
839 } 841 }
840 842
841 if (pglen) { 843 if (pglen) {
@@ -851,19 +853,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
851 for (j = 0; j < pglen; j++) 853 for (j = 0; j < pglen; j++)
852 pg->osds[j] = ceph_decode_32(p); 854 pg->osds[j] = ceph_decode_32(p);
853 err = __insert_pg_mapping(pg, &map->pg_temp); 855 err = __insert_pg_mapping(pg, &map->pg_temp);
854 if (err) 856 if (err) {
857 kfree(pg);
855 goto bad; 858 goto bad;
859 }
856 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, 860 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
857 pglen); 861 pglen);
858 } 862 }
859 } 863 }
860 while (rbp) { 864 while (rbp) {
861 struct rb_node *cur = rbp; 865 struct ceph_pg_mapping *cur =
866 rb_entry(rbp, struct ceph_pg_mapping, node);
867
862 rbp = rb_next(rbp); 868 rbp = rb_next(rbp);
863 dout(" removed pg_temp %llx\n", 869 dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
864 *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, 870 rb_erase(&cur->node, &map->pg_temp);
865 node)->pgid); 871 kfree(cur);
866 rb_erase(cur, &map->pg_temp);
867 } 872 }
868 873
869 /* ignore the rest */ 874 /* ignore the rest */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 484e52bb40bb..2cb1a70214d7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -923,7 +923,7 @@ init_cifs(void)
923 goto out_unregister_filesystem; 923 goto out_unregister_filesystem;
924#endif 924#endif
925#ifdef CONFIG_CIFS_DFS_UPCALL 925#ifdef CONFIG_CIFS_DFS_UPCALL
926 rc = register_key_type(&key_type_dns_resolver); 926 rc = cifs_init_dns_resolver();
927 if (rc) 927 if (rc)
928 goto out_unregister_key_type; 928 goto out_unregister_key_type;
929#endif 929#endif
@@ -935,7 +935,7 @@ init_cifs(void)
935 935
936 out_unregister_resolver_key: 936 out_unregister_resolver_key:
937#ifdef CONFIG_CIFS_DFS_UPCALL 937#ifdef CONFIG_CIFS_DFS_UPCALL
938 unregister_key_type(&key_type_dns_resolver); 938 cifs_exit_dns_resolver();
939 out_unregister_key_type: 939 out_unregister_key_type:
940#endif 940#endif
941#ifdef CONFIG_CIFS_UPCALL 941#ifdef CONFIG_CIFS_UPCALL
@@ -961,7 +961,7 @@ exit_cifs(void)
961 cifs_proc_clean(); 961 cifs_proc_clean();
962#ifdef CONFIG_CIFS_DFS_UPCALL 962#ifdef CONFIG_CIFS_DFS_UPCALL
963 cifs_dfs_release_automount_timer(); 963 cifs_dfs_release_automount_timer();
964 unregister_key_type(&key_type_dns_resolver); 964 cifs_exit_dns_resolver();
965#endif 965#endif
966#ifdef CONFIG_CIFS_UPCALL 966#ifdef CONFIG_CIFS_UPCALL
967 unregister_key_type(&cifs_spnego_key_type); 967 unregister_key_type(&cifs_spnego_key_type);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 4db2c5e7283f..49315cbf742d 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -24,12 +24,16 @@
24 */ 24 */
25 25
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/keyctl.h>
28#include <linux/key-type.h>
27#include <keys/user-type.h> 29#include <keys/user-type.h>
28#include "dns_resolve.h" 30#include "dns_resolve.h"
29#include "cifsglob.h" 31#include "cifsglob.h"
30#include "cifsproto.h" 32#include "cifsproto.h"
31#include "cifs_debug.h" 33#include "cifs_debug.h"
32 34
35static const struct cred *dns_resolver_cache;
36
33/* Checks if supplied name is IP address 37/* Checks if supplied name is IP address
34 * returns: 38 * returns:
35 * 1 - name is IP 39 * 1 - name is IP
@@ -94,6 +98,7 @@ struct key_type key_type_dns_resolver = {
94int 98int
95dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) 99dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
96{ 100{
101 const struct cred *saved_cred;
97 int rc = -EAGAIN; 102 int rc = -EAGAIN;
98 struct key *rkey = ERR_PTR(-EAGAIN); 103 struct key *rkey = ERR_PTR(-EAGAIN);
99 char *name; 104 char *name;
@@ -133,8 +138,15 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
133 goto skip_upcall; 138 goto skip_upcall;
134 } 139 }
135 140
141 saved_cred = override_creds(dns_resolver_cache);
136 rkey = request_key(&key_type_dns_resolver, name, ""); 142 rkey = request_key(&key_type_dns_resolver, name, "");
143 revert_creds(saved_cred);
137 if (!IS_ERR(rkey)) { 144 if (!IS_ERR(rkey)) {
145 if (!(rkey->perm & KEY_USR_VIEW)) {
146 down_read(&rkey->sem);
147 rkey->perm |= KEY_USR_VIEW;
148 up_read(&rkey->sem);
149 }
138 len = rkey->type_data.x[0]; 150 len = rkey->type_data.x[0];
139 data = rkey->payload.data; 151 data = rkey->payload.data;
140 } else { 152 } else {
@@ -165,4 +177,61 @@ out:
165 return rc; 177 return rc;
166} 178}
167 179
180int __init cifs_init_dns_resolver(void)
181{
182 struct cred *cred;
183 struct key *keyring;
184 int ret;
185
186 printk(KERN_NOTICE "Registering the %s key type\n",
187 key_type_dns_resolver.name);
188
189 /* create an override credential set with a special thread keyring in
190 * which DNS requests are cached
191 *
192 * this is used to prevent malicious redirections from being installed
193 * with add_key().
194 */
195 cred = prepare_kernel_cred(NULL);
196 if (!cred)
197 return -ENOMEM;
198
199 keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
200 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
201 KEY_USR_VIEW | KEY_USR_READ,
202 KEY_ALLOC_NOT_IN_QUOTA);
203 if (IS_ERR(keyring)) {
204 ret = PTR_ERR(keyring);
205 goto failed_put_cred;
206 }
207
208 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
209 if (ret < 0)
210 goto failed_put_key;
211
212 ret = register_key_type(&key_type_dns_resolver);
213 if (ret < 0)
214 goto failed_put_key;
215
216 /* instruct request_key() to use this special keyring as a cache for
217 * the results it looks up */
218 cred->thread_keyring = keyring;
219 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
220 dns_resolver_cache = cred;
221 return 0;
222
223failed_put_key:
224 key_put(keyring);
225failed_put_cred:
226 put_cred(cred);
227 return ret;
228}
168 229
230void __exit cifs_exit_dns_resolver(void)
231{
232 key_revoke(dns_resolver_cache->thread_keyring);
233 unregister_key_type(&key_type_dns_resolver);
234 put_cred(dns_resolver_cache);
235 printk(KERN_NOTICE "Unregistered %s key type\n",
236 key_type_dns_resolver.name);
237}
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 966e9288930b..26b9eaa9f5ee 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -24,8 +24,8 @@
24#define _DNS_RESOLVE_H 24#define _DNS_RESOLVE_H
25 25
26#ifdef __KERNEL__ 26#ifdef __KERNEL__
27#include <linux/key-type.h> 27extern int __init cifs_init_dns_resolver(void);
28extern struct key_type key_type_dns_resolver; 28extern void __exit cifs_exit_dns_resolver(void);
29extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); 29extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
30#endif /* KERNEL */ 30#endif /* KERNEL */
31 31
diff --git a/fs/dcache.c b/fs/dcache.c
index c8c78ba07827..86d4db15473e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
896 * 896 *
897 * In this case we return -1 to tell the caller that we baled. 897 * In this case we return -1 to tell the caller that we baled.
898 */ 898 */
899static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 899static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
900{ 900{
901 if (nr) { 901 if (nr) {
902 if (!(gfp_mask & __GFP_FS)) 902 if (!(gfp_mask & __GFP_FS))
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 2d8dbce9d485..46c4dd8dfcc3 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -31,9 +31,9 @@ static struct mutex ecryptfs_msg_ctx_lists_mux;
31 31
32static struct hlist_head *ecryptfs_daemon_hash; 32static struct hlist_head *ecryptfs_daemon_hash;
33struct mutex ecryptfs_daemon_hash_mux; 33struct mutex ecryptfs_daemon_hash_mux;
34static int ecryptfs_hash_buckets; 34static int ecryptfs_hash_bits;
35#define ecryptfs_uid_hash(uid) \ 35#define ecryptfs_uid_hash(uid) \
36 hash_long((unsigned long)uid, ecryptfs_hash_buckets) 36 hash_long((unsigned long)uid, ecryptfs_hash_bits)
37 37
38static u32 ecryptfs_msg_counter; 38static u32 ecryptfs_msg_counter;
39static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; 39static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -486,18 +486,19 @@ int ecryptfs_init_messaging(void)
486 } 486 }
487 mutex_init(&ecryptfs_daemon_hash_mux); 487 mutex_init(&ecryptfs_daemon_hash_mux);
488 mutex_lock(&ecryptfs_daemon_hash_mux); 488 mutex_lock(&ecryptfs_daemon_hash_mux);
489 ecryptfs_hash_buckets = 1; 489 ecryptfs_hash_bits = 1;
490 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) 490 while (ecryptfs_number_of_users >> ecryptfs_hash_bits)
491 ecryptfs_hash_buckets++; 491 ecryptfs_hash_bits++;
492 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head) 492 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
493 * ecryptfs_hash_buckets), GFP_KERNEL); 493 * (1 << ecryptfs_hash_bits)),
494 GFP_KERNEL);
494 if (!ecryptfs_daemon_hash) { 495 if (!ecryptfs_daemon_hash) {
495 rc = -ENOMEM; 496 rc = -ENOMEM;
496 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); 497 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
497 mutex_unlock(&ecryptfs_daemon_hash_mux); 498 mutex_unlock(&ecryptfs_daemon_hash_mux);
498 goto out; 499 goto out;
499 } 500 }
500 for (i = 0; i < ecryptfs_hash_buckets; i++) 501 for (i = 0; i < (1 << ecryptfs_hash_bits); i++)
501 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]); 502 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
502 mutex_unlock(&ecryptfs_daemon_hash_mux); 503 mutex_unlock(&ecryptfs_daemon_hash_mux);
503 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) 504 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
@@ -554,7 +555,7 @@ void ecryptfs_release_messaging(void)
554 int i; 555 int i;
555 556
556 mutex_lock(&ecryptfs_daemon_hash_mux); 557 mutex_lock(&ecryptfs_daemon_hash_mux);
557 for (i = 0; i < ecryptfs_hash_buckets; i++) { 558 for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
558 int rc; 559 int rc;
559 560
560 hlist_for_each_entry(daemon, elem, 561 hlist_for_each_entry(daemon, elem,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b402..84da64b551b2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
1041 1041
1042 if (gfs2_is_stuffed(ip)) { 1042 if (gfs2_is_stuffed(ip)) {
1043 u64 dsize = size + sizeof(struct gfs2_inode); 1043 u64 dsize = size + sizeof(struct gfs2_inode);
1044 ip->i_disksize = size;
1044 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1045 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1045 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1046 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1046 gfs2_dinode_out(ip, dibh->b_data); 1047 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a9..6b48d7c268b2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
392 unsigned totlen = be16_to_cpu(dent->de_rec_len); 392 unsigned totlen = be16_to_cpu(dent->de_rec_len);
393 393
394 if (gfs2_dirent_sentinel(dent)) 394 if (gfs2_dirent_sentinel(dent))
395 actual = GFS2_DIRENT_SIZE(0); 395 actual = 0;
396 if (totlen - actual >= required) 396 if (totlen - actual >= required)
397 return 1; 397 return 1;
398 return 0; 398 return 0;
@@ -1231,6 +1231,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1231 return 0; 1231 return 0;
1232} 1232}
1233 1233
1234static void *gfs2_alloc_sort_buffer(unsigned size)
1235{
1236 void *ptr = NULL;
1237
1238 if (size < KMALLOC_MAX_SIZE)
1239 ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
1240 if (!ptr)
1241 ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL);
1242 return ptr;
1243}
1244
1245static void gfs2_free_sort_buffer(void *ptr)
1246{
1247 if (is_vmalloc_addr(ptr))
1248 vfree(ptr);
1249 else
1250 kfree(ptr);
1251}
1252
1234static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, 1253static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1235 filldir_t filldir, int *copied, unsigned *depth, 1254 filldir_t filldir, int *copied, unsigned *depth,
1236 u64 leaf_no) 1255 u64 leaf_no)
@@ -1271,7 +1290,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1271 * 99 is the maximum number of entries that can fit in a single 1290 * 99 is the maximum number of entries that can fit in a single
1272 * leaf block. 1291 * leaf block.
1273 */ 1292 */
1274 larr = vmalloc((leaves + entries + 99) * sizeof(void *)); 1293 larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
1275 if (!larr) 1294 if (!larr)
1276 goto out; 1295 goto out;
1277 darr = (const struct gfs2_dirent **)(larr + leaves); 1296 darr = (const struct gfs2_dirent **)(larr + leaves);
@@ -1282,7 +1301,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1282 do { 1301 do {
1283 error = get_leaf(ip, lfn, &bh); 1302 error = get_leaf(ip, lfn, &bh);
1284 if (error) 1303 if (error)
1285 goto out_kfree; 1304 goto out_free;
1286 lf = (struct gfs2_leaf *)bh->b_data; 1305 lf = (struct gfs2_leaf *)bh->b_data;
1287 lfn = be64_to_cpu(lf->lf_next); 1306 lfn = be64_to_cpu(lf->lf_next);
1288 if (lf->lf_entries) { 1307 if (lf->lf_entries) {
@@ -1291,7 +1310,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1291 gfs2_dirent_gather, NULL, &g); 1310 gfs2_dirent_gather, NULL, &g);
1292 error = PTR_ERR(dent); 1311 error = PTR_ERR(dent);
1293 if (IS_ERR(dent)) 1312 if (IS_ERR(dent))
1294 goto out_kfree; 1313 goto out_free;
1295 if (entries2 != g.offset) { 1314 if (entries2 != g.offset) {
1296 fs_warn(sdp, "Number of entries corrupt in dir " 1315 fs_warn(sdp, "Number of entries corrupt in dir "
1297 "leaf %llu, entries2 (%u) != " 1316 "leaf %llu, entries2 (%u) != "
@@ -1300,7 +1319,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1300 entries2, g.offset); 1319 entries2, g.offset);
1301 1320
1302 error = -EIO; 1321 error = -EIO;
1303 goto out_kfree; 1322 goto out_free;
1304 } 1323 }
1305 error = 0; 1324 error = 0;
1306 larr[leaf++] = bh; 1325 larr[leaf++] = bh;
@@ -1312,10 +1331,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1312 BUG_ON(entries2 != entries); 1331 BUG_ON(entries2 != entries);
1313 error = do_filldir_main(ip, offset, opaque, filldir, darr, 1332 error = do_filldir_main(ip, offset, opaque, filldir, darr,
1314 entries, copied); 1333 entries, copied);
1315out_kfree: 1334out_free:
1316 for(i = 0; i < leaf; i++) 1335 for(i = 0; i < leaf; i++)
1317 brelse(larr[i]); 1336 brelse(larr[i]);
1318 vfree(larr); 1337 gfs2_free_sort_buffer(larr);
1319out: 1338out:
1320 return error; 1339 return error;
1321} 1340}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf493536..0898f3ec8212 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work)
706{ 706{
707 unsigned long delay = 0; 707 unsigned long delay = 0;
708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
709 struct gfs2_holder *gh;
709 int drop_ref = 0; 710 int drop_ref = 0;
710 711
712 if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
713 spin_lock(&gl->gl_spin);
714 gh = find_first_waiter(gl);
715 if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
716 test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
717 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
718 spin_unlock(&gl->gl_spin);
719 }
720
711 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { 721 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
712 finish_xmote(gl, gl->gl_reply); 722 finish_xmote(gl, gl->gl_reply);
713 drop_ref = 1; 723 drop_ref = 1;
@@ -1348,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1348} 1358}
1349 1359
1350 1360
1351static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) 1361static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1352{ 1362{
1353 struct gfs2_glock *gl; 1363 struct gfs2_glock *gl;
1354 int may_demote; 1364 int may_demote;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a5..f03afd9c44bc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
169{ 169{
170 struct inode *inode; 170 struct inode *inode;
171 struct gfs2_inode *ip; 171 struct gfs2_inode *ip;
172 struct gfs2_glock *io_gl; 172 struct gfs2_glock *io_gl = NULL;
173 int error; 173 int error;
174 174
175 inode = gfs2_iget(sb, no_addr); 175 inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
198 ip->i_iopen_gh.gh_gl->gl_object = ip; 198 ip->i_iopen_gh.gh_gl->gl_object = ip;
199 199
200 gfs2_glock_put(io_gl); 200 gfs2_glock_put(io_gl);
201 io_gl = NULL;
201 202
202 if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) 203 if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
203 goto gfs2_nfsbypass; 204 goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
228fail_glock: 229fail_glock:
229 gfs2_glock_dq(&ip->i_iopen_gh); 230 gfs2_glock_dq(&ip->i_iopen_gh);
230fail_iopen: 231fail_iopen:
231 gfs2_glock_put(io_gl); 232 if (io_gl)
233 gfs2_glock_put(io_gl);
232fail_put: 234fail_put:
233 if (inode->i_state & I_NEW) 235 if (inode->i_state & I_NEW)
234 ip->i_gl->gl_object = NULL; 236 ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
256{ 258{
257 struct gfs2_sbd *sdp; 259 struct gfs2_sbd *sdp;
258 struct gfs2_inode *ip; 260 struct gfs2_inode *ip;
259 struct gfs2_glock *io_gl; 261 struct gfs2_glock *io_gl = NULL;
260 int error; 262 int error;
261 struct gfs2_holder gh; 263 struct gfs2_holder gh;
262 struct inode *inode; 264 struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
293 295
294 ip->i_iopen_gh.gh_gl->gl_object = ip; 296 ip->i_iopen_gh.gh_gl->gl_object = ip;
295 gfs2_glock_put(io_gl); 297 gfs2_glock_put(io_gl);
298 io_gl = NULL;
296 299
297 inode->i_mode = DT2IF(DT_UNKNOWN); 300 inode->i_mode = DT2IF(DT_UNKNOWN);
298 301
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
319fail_glock: 322fail_glock:
320 gfs2_glock_dq(&ip->i_iopen_gh); 323 gfs2_glock_dq(&ip->i_iopen_gh);
321fail_iopen: 324fail_iopen:
322 gfs2_glock_put(io_gl); 325 if (io_gl)
326 gfs2_glock_put(io_gl);
323fail_put: 327fail_put:
324 ip->i_gl->gl_object = NULL; 328 ip->i_gl->gl_object = NULL;
325 gfs2_glock_put(ip->i_gl); 329 gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769e..8f02d3db8f42 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
77static atomic_t qd_lru_count = ATOMIC_INIT(0); 77static atomic_t qd_lru_count = ATOMIC_INIT(0);
78static DEFINE_SPINLOCK(qd_lru_lock); 78static DEFINE_SPINLOCK(qd_lru_lock);
79 79
80int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) 80int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
81{ 81{
82 struct gfs2_quota_data *qd; 82 struct gfs2_quota_data *qd;
83 struct gfs2_sbd *sdp; 83 struct gfs2_sbd *sdp;
@@ -694,10 +694,8 @@ get_a_page:
694 if (!buffer_mapped(bh)) 694 if (!buffer_mapped(bh))
695 goto unlock_out; 695 goto unlock_out;
696 /* If it's a newly allocated disk block for quota, zero it */ 696 /* If it's a newly allocated disk block for quota, zero it */
697 if (buffer_new(bh)) { 697 if (buffer_new(bh))
698 memset(bh->b_data, 0, bh->b_size); 698 zero_user(page, pos - blocksize, bh->b_size);
699 set_buffer_uptodate(bh);
700 }
701 } 699 }
702 700
703 if (PageUptodate(page)) 701 if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
723 721
724 /* If quota straddles page boundary, we need to update the rest of the 722 /* If quota straddles page boundary, we need to update the rest of the
725 * quota at the beginning of the next page */ 723 * quota at the beginning of the next page */
726 if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */ 724 if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
727 ptr = ptr + nbytes; 725 ptr = ptr + nbytes;
728 nbytes = sizeof(struct gfs2_quota) - nbytes; 726 nbytes = sizeof(struct gfs2_quota) - nbytes;
729 offset = 0; 727 offset = 0;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd14..e7d236ca48bd 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
51 return ret; 51 return ret;
52} 52}
53 53
54extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); 54extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
55extern const struct quotactl_ops gfs2_quotactl_ops; 55extern const struct quotactl_ops gfs2_quotactl_ops;
56 56
57#endif /* __QUOTA_DOT_H__ */ 57#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d65..722860b323a9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
512 * This function is passed the number of inodes to scan, and it returns the 512 * This function is passed the number of inodes to scan, and it returns the
513 * total number of remaining possibly-reclaimable inodes. 513 * total number of remaining possibly-reclaimable inodes.
514 */ 514 */
515static int shrink_icache_memory(int nr, gfp_t gfp_mask) 515static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
516{ 516{
517 if (nr) { 517 if (nr) {
518 /* 518 /*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff5932769..036880895bfc 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
297 struct page *new_page; 297 struct page *new_page;
298 unsigned int new_offset; 298 unsigned int new_offset;
299 struct buffer_head *bh_in = jh2bh(jh_in); 299 struct buffer_head *bh_in = jh2bh(jh_in);
300 struct jbd2_buffer_trigger_type *triggers;
301 journal_t *journal = transaction->t_journal; 300 journal_t *journal = transaction->t_journal;
302 301
303 /* 302 /*
@@ -328,21 +327,21 @@ repeat:
328 done_copy_out = 1; 327 done_copy_out = 1;
329 new_page = virt_to_page(jh_in->b_frozen_data); 328 new_page = virt_to_page(jh_in->b_frozen_data);
330 new_offset = offset_in_page(jh_in->b_frozen_data); 329 new_offset = offset_in_page(jh_in->b_frozen_data);
331 triggers = jh_in->b_frozen_triggers;
332 } else { 330 } else {
333 new_page = jh2bh(jh_in)->b_page; 331 new_page = jh2bh(jh_in)->b_page;
334 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 332 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
335 triggers = jh_in->b_triggers;
336 } 333 }
337 334
338 mapped_data = kmap_atomic(new_page, KM_USER0); 335 mapped_data = kmap_atomic(new_page, KM_USER0);
339 /* 336 /*
340 * Fire any commit trigger. Do this before checking for escaping, 337 * Fire data frozen trigger if data already wasn't frozen. Do this
341 * as the trigger may modify the magic offset. If a copy-out 338 * before checking for escaping, as the trigger may modify the magic
342 * happens afterwards, it will have the correct data in the buffer. 339 * offset. If a copy-out happens afterwards, it will have the correct
340 * data in the buffer.
343 */ 341 */
344 jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, 342 if (!done_copy_out)
345 triggers); 343 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
344 jh_in->b_triggers);
346 345
347 /* 346 /*
348 * Check for escaping 347 * Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620ac..b8e0806681bb 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
725 page = jh2bh(jh)->b_page; 725 page = jh2bh(jh)->b_page;
726 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 726 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
727 source = kmap_atomic(page, KM_USER0); 727 source = kmap_atomic(page, KM_USER0);
728 /* Fire data frozen trigger just before we copy the data */
729 jbd2_buffer_frozen_trigger(jh, source + offset,
730 jh->b_triggers);
728 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 731 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
729 kunmap_atomic(source, KM_USER0); 732 kunmap_atomic(source, KM_USER0);
730 733
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
963 jh->b_triggers = type; 966 jh->b_triggers = type;
964} 967}
965 968
966void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, 969void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
967 struct jbd2_buffer_trigger_type *triggers) 970 struct jbd2_buffer_trigger_type *triggers)
968{ 971{
969 struct buffer_head *bh = jh2bh(jh); 972 struct buffer_head *bh = jh2bh(jh);
970 973
971 if (!triggers || !triggers->t_commit) 974 if (!triggers || !triggers->t_frozen)
972 return; 975 return;
973 976
974 triggers->t_commit(triggers, bh, mapped_data, bh->b_size); 977 triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
975} 978}
976 979
977void jbd2_buffer_abort_trigger(struct journal_head *jh, 980void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index a2d58c96f1b4..d258e261bdc7 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
626 626
627static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) 627static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
628{ 628{
629 /* success of check_xattr_ref_inode() means taht inode (ic) dose not have 629 /* success of check_xattr_ref_inode() means that inode (ic) dose not have
630 * duplicate name/value pairs. If duplicate name/value pair would be found, 630 * duplicate name/value pairs. If duplicate name/value pair would be found,
631 * one will be removed. 631 * one will be removed.
632 */ 632 */
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a9..e28f21b95344 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
115 * What the mbcache registers as to get shrunk dynamically. 115 * What the mbcache registers as to get shrunk dynamically.
116 */ 116 */
117 117
118static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); 118static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
119 119
120static struct shrinker mb_cache_shrinker = { 120static struct shrinker mb_cache_shrinker = {
121 .shrink = mb_cache_shrink_fn, 121 .shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
191 * This function is called by the kernel memory management when memory 191 * This function is called by the kernel memory management when memory
192 * gets low. 192 * gets low.
193 * 193 *
194 * @shrink: (ignored)
194 * @nr_to_scan: Number of objects to scan 195 * @nr_to_scan: Number of objects to scan
195 * @gfp_mask: (ignored) 196 * @gfp_mask: (ignored)
196 * 197 *
197 * Returns the number of objects which are present in the cache. 198 * Returns the number of objects which are present in the cache.
198 */ 199 */
199static int 200static int
200mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) 201mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
201{ 202{
202 LIST_HEAD(free_list); 203 LIST_HEAD(free_list);
203 struct list_head *l, *ltmp; 204 struct list_head *l, *ltmp;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91c..e60416d3f818 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
1710 } 1710 }
1711} 1711}
1712 1712
1713int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) 1713int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
1714{ 1714{
1715 LIST_HEAD(head); 1715 LIST_HEAD(head);
1716 struct nfs_inode *nfsi; 1716 struct nfs_inode *nfsi;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386c..e70f44b9b3f4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
205void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 205void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
206 206
207/* dir.c */ 207/* dir.c */
208extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); 208extern int nfs_access_cache_shrinker(struct shrinker *shrink,
209 int nr_to_scan, gfp_t gfp_mask);
209 210
210/* inode.c */ 211/* inode.c */
211extern struct workqueue_struct *nfsiod_workqueue; 212extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc18..356e976772bf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
196 dump_stack(); 196 dump_stack();
197 goto bail; 197 goto bail;
198 } 198 }
199
200 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
201 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
202 (unsigned long long)past_eof);
203
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206 } 199 }
207 200
201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
202 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
203 (unsigned long long)past_eof);
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206
208bail: 207bail:
209 if (err < 0) 208 if (err < 0)
210 err = -EIO; 209 err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle,
459 return ret; 458 return ret;
460} 459}
461 460
462handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
463 struct page *page,
464 unsigned from,
465 unsigned to)
466{
467 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
468 handle_t *handle;
469 int ret = 0;
470
471 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
472 if (IS_ERR(handle)) {
473 ret = -ENOMEM;
474 mlog_errno(ret);
475 goto out;
476 }
477
478 if (ocfs2_should_order_data(inode)) {
479 ret = ocfs2_jbd2_file_inode(handle, inode);
480 if (ret < 0)
481 mlog_errno(ret);
482 }
483out:
484 if (ret) {
485 if (!IS_ERR(handle))
486 ocfs2_commit_trans(osb, handle);
487 handle = ERR_PTR(ret);
488 }
489 return handle;
490}
491
492static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) 461static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
493{ 462{
494 sector_t status; 463 sector_t status;
@@ -1131,23 +1100,37 @@ out:
1131 */ 1100 */
1132static int ocfs2_grab_pages_for_write(struct address_space *mapping, 1101static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1133 struct ocfs2_write_ctxt *wc, 1102 struct ocfs2_write_ctxt *wc,
1134 u32 cpos, loff_t user_pos, int new, 1103 u32 cpos, loff_t user_pos,
1104 unsigned user_len, int new,
1135 struct page *mmap_page) 1105 struct page *mmap_page)
1136{ 1106{
1137 int ret = 0, i; 1107 int ret = 0, i;
1138 unsigned long start, target_index, index; 1108 unsigned long start, target_index, end_index, index;
1139 struct inode *inode = mapping->host; 1109 struct inode *inode = mapping->host;
1110 loff_t last_byte;
1140 1111
1141 target_index = user_pos >> PAGE_CACHE_SHIFT; 1112 target_index = user_pos >> PAGE_CACHE_SHIFT;
1142 1113
1143 /* 1114 /*
1144 * Figure out how many pages we'll be manipulating here. For 1115 * Figure out how many pages we'll be manipulating here. For
1145 * non allocating write, we just change the one 1116 * non allocating write, we just change the one
1146 * page. Otherwise, we'll need a whole clusters worth. 1117 * page. Otherwise, we'll need a whole clusters worth. If we're
1118 * writing past i_size, we only need enough pages to cover the
1119 * last page of the write.
1147 */ 1120 */
1148 if (new) { 1121 if (new) {
1149 wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); 1122 wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
1150 start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); 1123 start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
1124 /*
1125 * We need the index *past* the last page we could possibly
1126 * touch. This is the page past the end of the write or
1127 * i_size, whichever is greater.
1128 */
1129 last_byte = max(user_pos + user_len, i_size_read(inode));
1130 BUG_ON(last_byte < 1);
1131 end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
1132 if ((start + wc->w_num_pages) > end_index)
1133 wc->w_num_pages = end_index - start;
1151 } else { 1134 } else {
1152 wc->w_num_pages = 1; 1135 wc->w_num_pages = 1;
1153 start = target_index; 1136 start = target_index;
@@ -1620,21 +1603,20 @@ out:
1620 * write path can treat it as an non-allocating write, which has no 1603 * write path can treat it as an non-allocating write, which has no
1621 * special case code for sparse/nonsparse files. 1604 * special case code for sparse/nonsparse files.
1622 */ 1605 */
1623static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, 1606static int ocfs2_expand_nonsparse_inode(struct inode *inode,
1624 unsigned len, 1607 struct buffer_head *di_bh,
1608 loff_t pos, unsigned len,
1625 struct ocfs2_write_ctxt *wc) 1609 struct ocfs2_write_ctxt *wc)
1626{ 1610{
1627 int ret; 1611 int ret;
1628 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1629 loff_t newsize = pos + len; 1612 loff_t newsize = pos + len;
1630 1613
1631 if (ocfs2_sparse_alloc(osb)) 1614 BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1632 return 0;
1633 1615
1634 if (newsize <= i_size_read(inode)) 1616 if (newsize <= i_size_read(inode))
1635 return 0; 1617 return 0;
1636 1618
1637 ret = ocfs2_extend_no_holes(inode, newsize, pos); 1619 ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
1638 if (ret) 1620 if (ret)
1639 mlog_errno(ret); 1621 mlog_errno(ret);
1640 1622
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1644 return ret; 1626 return ret;
1645} 1627}
1646 1628
1629static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
1630 loff_t pos)
1631{
1632 int ret = 0;
1633
1634 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1635 if (pos > i_size_read(inode))
1636 ret = ocfs2_zero_extend(inode, di_bh, pos);
1637
1638 return ret;
1639}
1640
1647int ocfs2_write_begin_nolock(struct address_space *mapping, 1641int ocfs2_write_begin_nolock(struct address_space *mapping,
1648 loff_t pos, unsigned len, unsigned flags, 1642 loff_t pos, unsigned len, unsigned flags,
1649 struct page **pagep, void **fsdata, 1643 struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1679 } 1673 }
1680 } 1674 }
1681 1675
1682 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); 1676 if (ocfs2_sparse_alloc(osb))
1677 ret = ocfs2_zero_tail(inode, di_bh, pos);
1678 else
1679 ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
1680 wc);
1683 if (ret) { 1681 if (ret) {
1684 mlog_errno(ret); 1682 mlog_errno(ret);
1685 goto out; 1683 goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1789 * that we can zero and flush if we error after adding the 1787 * that we can zero and flush if we error after adding the
1790 * extent. 1788 * extent.
1791 */ 1789 */
1792 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1790 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
1793 cluster_of_pages, mmap_page); 1791 cluster_of_pages, mmap_page);
1794 if (ret) { 1792 if (ret) {
1795 mlog_errno(ret); 1793 mlog_errno(ret);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6b5a492e1749..153abb5abef0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
1671 struct dlm_ctxt *dlm = NULL; 1671 struct dlm_ctxt *dlm = NULL;
1672 struct dlm_ctxt *new_ctxt = NULL; 1672 struct dlm_ctxt *new_ctxt = NULL;
1673 1673
1674 if (strlen(domain) > O2NM_MAX_NAME_LEN) { 1674 if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
1675 ret = -ENAMETOOLONG; 1675 ret = -ENAMETOOLONG;
1676 mlog(ML_ERROR, "domain name length too long\n"); 1676 mlog(ML_ERROR, "domain name length too long\n");
1677 goto leave; 1677 goto leave;
@@ -1709,6 +1709,7 @@ retry:
1709 } 1709 }
1710 1710
1711 if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { 1711 if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
1712 spin_unlock(&dlm_domain_lock);
1712 mlog(ML_ERROR, 1713 mlog(ML_ERROR,
1713 "Requested locking protocol version is not " 1714 "Requested locking protocol version is not "
1714 "compatible with already registered domain " 1715 "compatible with already registered domain "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4a7506a4e314..94b97fc6a88e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
2808 mlog(0, "trying again...\n"); 2808 mlog(0, "trying again...\n");
2809 goto again; 2809 goto again;
2810 } 2810 }
2811 /* now that we are sure the MIGRATING state is there, drop
2812 * the unneded state which blocked threads trying to DIRTY */
2813 spin_lock(&res->spinlock);
2814 BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
2815 BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
2816 res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
2817 spin_unlock(&res->spinlock);
2818 2811
2812 ret = 0;
2819 /* did the target go down or die? */ 2813 /* did the target go down or die? */
2820 spin_lock(&dlm->spinlock); 2814 spin_lock(&dlm->spinlock);
2821 if (!test_bit(target, dlm->domain_map)) { 2815 if (!test_bit(target, dlm->domain_map)) {
@@ -2826,9 +2820,21 @@ again:
2826 spin_unlock(&dlm->spinlock); 2820 spin_unlock(&dlm->spinlock);
2827 2821
2828 /* 2822 /*
2823 * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
2824 * another try; otherwise, we are sure the MIGRATING state is there,
2825 * drop the unneded state which blocked threads trying to DIRTY
2826 */
2827 spin_lock(&res->spinlock);
2828 BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
2829 res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
2830 if (!ret)
2831 BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
2832 spin_unlock(&res->spinlock);
2833
2834 /*
2829 * at this point: 2835 * at this point:
2830 * 2836 *
2831 * o the DLM_LOCK_RES_MIGRATING flag is set 2837 * o the DLM_LOCK_RES_MIGRATING flag is set if target not down
2832 * o there are no pending asts on this lockres 2838 * o there are no pending asts on this lockres
2833 * o all processes trying to reserve an ast on this 2839 * o all processes trying to reserve an ast on this
2834 * lockres must wait for the MIGRATING flag to clear 2840 * lockres must wait for the MIGRATING flag to clear
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f8b75ce4be70..9dfaac73b36d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
463 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 463 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
464 int bit; 464 int bit;
465 465
466 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); 466 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
467 if (bit >= O2NM_MAX_NODES || bit < 0) 467 if (bit >= O2NM_MAX_NODES || bit < 0)
468 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); 468 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
469 else 469 else
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..2b10b36d1577 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
724 return status; 724 return status;
725} 725}
726 726
727/*
728 * While a write will already be ordering the data, a truncate will not.
729 * Thus, we need to explicitly order the zeroed pages.
730 */
731static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
732{
733 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
734 handle_t *handle = NULL;
735 int ret = 0;
736
737 if (!ocfs2_should_order_data(inode))
738 goto out;
739
740 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
741 if (IS_ERR(handle)) {
742 ret = -ENOMEM;
743 mlog_errno(ret);
744 goto out;
745 }
746
747 ret = ocfs2_jbd2_file_inode(handle, inode);
748 if (ret < 0)
749 mlog_errno(ret);
750
751out:
752 if (ret) {
753 if (!IS_ERR(handle))
754 ocfs2_commit_trans(osb, handle);
755 handle = ERR_PTR(ret);
756 }
757 return handle;
758}
759
727/* Some parts of this taken from generic_cont_expand, which turned out 760/* Some parts of this taken from generic_cont_expand, which turned out
728 * to be too fragile to do exactly what we need without us having to 761 * to be too fragile to do exactly what we need without us having to
729 * worry about recursive locking in ->write_begin() and ->write_end(). */ 762 * worry about recursive locking in ->write_begin() and ->write_end(). */
730static int ocfs2_write_zero_page(struct inode *inode, 763static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
731 u64 size) 764 u64 abs_to)
732{ 765{
733 struct address_space *mapping = inode->i_mapping; 766 struct address_space *mapping = inode->i_mapping;
734 struct page *page; 767 struct page *page;
735 unsigned long index; 768 unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
736 unsigned int offset;
737 handle_t *handle = NULL; 769 handle_t *handle = NULL;
738 int ret; 770 int ret = 0;
771 unsigned zero_from, zero_to, block_start, block_end;
739 772
740 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 773 BUG_ON(abs_from >= abs_to);
741 /* ugh. in prepare/commit_write, if from==to==start of block, we 774 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
742 ** skip the prepare. make sure we never send an offset for the start 775 BUG_ON(abs_from & (inode->i_blkbits - 1));
743 ** of a block
744 */
745 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746 offset++;
747 }
748 index = size >> PAGE_CACHE_SHIFT;
749 776
750 page = grab_cache_page(mapping, index); 777 page = grab_cache_page(mapping, index);
751 if (!page) { 778 if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
754 goto out; 781 goto out;
755 } 782 }
756 783
757 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); 784 /* Get the offsets within the page that we want to zero */
758 if (ret < 0) { 785 zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
759 mlog_errno(ret); 786 zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
760 goto out_unlock; 787 if (!zero_to)
761 } 788 zero_to = PAGE_CACHE_SIZE;
762 789
763 if (ocfs2_should_order_data(inode)) { 790 mlog(0,
764 handle = ocfs2_start_walk_page_trans(inode, page, offset, 791 "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
765 offset); 792 (unsigned long long)abs_from, (unsigned long long)abs_to,
766 if (IS_ERR(handle)) { 793 index, zero_from, zero_to);
767 ret = PTR_ERR(handle); 794
768 handle = NULL; 795 /* We know that zero_from is block aligned */
796 for (block_start = zero_from; block_start < zero_to;
797 block_start = block_end) {
798 block_end = block_start + (1 << inode->i_blkbits);
799
800 /*
801 * block_start is block-aligned. Bump it by one to
802 * force ocfs2_{prepare,commit}_write() to zero the
803 * whole block.
804 */
805 ret = ocfs2_prepare_write_nolock(inode, page,
806 block_start + 1,
807 block_start + 1);
808 if (ret < 0) {
809 mlog_errno(ret);
769 goto out_unlock; 810 goto out_unlock;
770 } 811 }
771 }
772 812
773 /* must not update i_size! */ 813 if (!handle) {
774 ret = block_commit_write(page, offset, offset); 814 handle = ocfs2_zero_start_ordered_transaction(inode);
775 if (ret < 0) 815 if (IS_ERR(handle)) {
776 mlog_errno(ret); 816 ret = PTR_ERR(handle);
777 else 817 handle = NULL;
778 ret = 0; 818 break;
819 }
820 }
821
822 /* must not update i_size! */
823 ret = block_commit_write(page, block_start + 1,
824 block_start + 1);
825 if (ret < 0)
826 mlog_errno(ret);
827 else
828 ret = 0;
829 }
779 830
780 if (handle) 831 if (handle)
781 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 832 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
833
782out_unlock: 834out_unlock:
783 unlock_page(page); 835 unlock_page(page);
784 page_cache_release(page); 836 page_cache_release(page);
@@ -786,22 +838,114 @@ out:
786 return ret; 838 return ret;
787} 839}
788 840
789static int ocfs2_zero_extend(struct inode *inode, 841/*
790 u64 zero_to_size) 842 * Find the next range to zero. We do this in terms of bytes because
843 * that's what ocfs2_zero_extend() wants, and it is dealing with the
844 * pagecache. We may return multiple extents.
845 *
846 * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
847 * needs to be zeroed. range_start and range_end return the next zeroing
848 * range. A subsequent call should pass the previous range_end as its
849 * zero_start. If range_end is 0, there's nothing to do.
850 *
851 * Unwritten extents are skipped over. Refcounted extents are CoWd.
852 */
853static int ocfs2_zero_extend_get_range(struct inode *inode,
854 struct buffer_head *di_bh,
855 u64 zero_start, u64 zero_end,
856 u64 *range_start, u64 *range_end)
791{ 857{
792 int ret = 0; 858 int rc = 0, needs_cow = 0;
793 u64 start_off; 859 u32 p_cpos, zero_clusters = 0;
794 struct super_block *sb = inode->i_sb; 860 u32 zero_cpos =
861 zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
862 u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
863 unsigned int num_clusters = 0;
864 unsigned int ext_flags = 0;
795 865
796 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 866 while (zero_cpos < last_cpos) {
797 while (start_off < zero_to_size) { 867 rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
798 ret = ocfs2_write_zero_page(inode, start_off); 868 &num_clusters, &ext_flags);
799 if (ret < 0) { 869 if (rc) {
800 mlog_errno(ret); 870 mlog_errno(rc);
871 goto out;
872 }
873
874 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
875 zero_clusters = num_clusters;
876 if (ext_flags & OCFS2_EXT_REFCOUNTED)
877 needs_cow = 1;
878 break;
879 }
880
881 zero_cpos += num_clusters;
882 }
883 if (!zero_clusters) {
884 *range_end = 0;
885 goto out;
886 }
887
888 while ((zero_cpos + zero_clusters) < last_cpos) {
889 rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
890 &p_cpos, &num_clusters,
891 &ext_flags);
892 if (rc) {
893 mlog_errno(rc);
801 goto out; 894 goto out;
802 } 895 }
803 896
804 start_off += sb->s_blocksize; 897 if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
898 break;
899 if (ext_flags & OCFS2_EXT_REFCOUNTED)
900 needs_cow = 1;
901 zero_clusters += num_clusters;
902 }
903 if ((zero_cpos + zero_clusters) > last_cpos)
904 zero_clusters = last_cpos - zero_cpos;
905
906 if (needs_cow) {
907 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
908 UINT_MAX);
909 if (rc) {
910 mlog_errno(rc);
911 goto out;
912 }
913 }
914
915 *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
916 *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
917 zero_cpos + zero_clusters);
918
919out:
920 return rc;
921}
922
923/*
924 * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
925 * has made sure that the entire range needs zeroing.
926 */
927static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
928 u64 range_end)
929{
930 int rc = 0;
931 u64 next_pos;
932 u64 zero_pos = range_start;
933
934 mlog(0, "range_start = %llu, range_end = %llu\n",
935 (unsigned long long)range_start,
936 (unsigned long long)range_end);
937 BUG_ON(range_start >= range_end);
938
939 while (zero_pos < range_end) {
940 next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
941 if (next_pos > range_end)
942 next_pos = range_end;
943 rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
944 if (rc < 0) {
945 mlog_errno(rc);
946 break;
947 }
948 zero_pos = next_pos;
805 949
806 /* 950 /*
807 * Very large extends have the potential to lock up 951 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
810 cond_resched(); 954 cond_resched();
811 } 955 }
812 956
813out: 957 return rc;
958}
959
960int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
961 loff_t zero_to_size)
962{
963 int ret = 0;
964 u64 zero_start, range_start = 0, range_end = 0;
965 struct super_block *sb = inode->i_sb;
966
967 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
968 mlog(0, "zero_start %llu for i_size %llu\n",
969 (unsigned long long)zero_start,
970 (unsigned long long)i_size_read(inode));
971 while (zero_start < zero_to_size) {
972 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
973 zero_to_size,
974 &range_start,
975 &range_end);
976 if (ret) {
977 mlog_errno(ret);
978 break;
979 }
980 if (!range_end)
981 break;
982 /* Trim the ends */
983 if (range_start < zero_start)
984 range_start = zero_start;
985 if (range_end > zero_to_size)
986 range_end = zero_to_size;
987
988 ret = ocfs2_zero_extend_range(inode, range_start,
989 range_end);
990 if (ret) {
991 mlog_errno(ret);
992 break;
993 }
994 zero_start = range_end;
995 }
996
814 return ret; 997 return ret;
815} 998}
816 999
817int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) 1000int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
1001 u64 new_i_size, u64 zero_to)
818{ 1002{
819 int ret; 1003 int ret;
820 u32 clusters_to_add; 1004 u32 clusters_to_add;
821 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1005 struct ocfs2_inode_info *oi = OCFS2_I(inode);
822 1006
1007 /*
1008 * Only quota files call this without a bh, and they can't be
1009 * refcounted.
1010 */
1011 BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
1012 BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
1013
823 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); 1014 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
824 if (clusters_to_add < oi->ip_clusters) 1015 if (clusters_to_add < oi->ip_clusters)
825 clusters_to_add = 0; 1016 clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
840 * still need to zero the area between the old i_size and the 1031 * still need to zero the area between the old i_size and the
841 * new i_size. 1032 * new i_size.
842 */ 1033 */
843 ret = ocfs2_zero_extend(inode, zero_to); 1034 ret = ocfs2_zero_extend(inode, di_bh, zero_to);
844 if (ret < 0) 1035 if (ret < 0)
845 mlog_errno(ret); 1036 mlog_errno(ret);
846 1037
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
862 goto out; 1053 goto out;
863 1054
864 if (i_size_read(inode) == new_i_size) 1055 if (i_size_read(inode) == new_i_size)
865 goto out; 1056 goto out;
866 BUG_ON(new_i_size < i_size_read(inode)); 1057 BUG_ON(new_i_size < i_size_read(inode));
867 1058
868 /* 1059 /*
869 * Fall through for converting inline data, even if the fs
870 * supports sparse files.
871 *
872 * The check for inline data here is legal - nobody can add
873 * the feature since we have i_mutex. We must check it again
874 * after acquiring ip_alloc_sem though, as paths like mmap
875 * might have raced us to converting the inode to extents.
876 */
877 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
878 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
879 goto out_update_size;
880
881 /*
882 * The alloc sem blocks people in read/write from reading our 1060 * The alloc sem blocks people in read/write from reading our
883 * allocation until we're done changing it. We depend on 1061 * allocation until we're done changing it. We depend on
884 * i_mutex to block other extend/truncate calls while we're 1062 * i_mutex to block other extend/truncate calls while we're
885 * here. 1063 * here. We even have to hold it for sparse files because there
1064 * might be some tail zeroing.
886 */ 1065 */
887 down_write(&oi->ip_alloc_sem); 1066 down_write(&oi->ip_alloc_sem);
888 1067
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
899 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); 1078 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
900 if (ret) { 1079 if (ret) {
901 up_write(&oi->ip_alloc_sem); 1080 up_write(&oi->ip_alloc_sem);
902
903 mlog_errno(ret); 1081 mlog_errno(ret);
904 goto out; 1082 goto out;
905 } 1083 }
906 } 1084 }
907 1085
908 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1086 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
909 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); 1087 ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
1088 else
1089 ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
1090 new_i_size);
910 1091
911 up_write(&oi->ip_alloc_sem); 1092 up_write(&oi->ip_alloc_sem);
912 1093
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index d66cf4f7c70e..97bf761c9e7c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
54int ocfs2_simple_size_update(struct inode *inode, 54int ocfs2_simple_size_update(struct inode *inode,
55 struct buffer_head *di_bh, 55 struct buffer_head *di_bh,
56 u64 new_i_size); 56 u64 new_i_size);
57int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, 57int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
58 u64 zero_to); 58 u64 new_i_size, u64 zero_to);
59int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
60 loff_t zero_to);
59int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
60int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
61 struct kstat *stat); 63 struct kstat *stat);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf16418..625de9d7088c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
472 return container_of(triggers, struct ocfs2_triggers, ot_triggers); 472 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
473} 473}
474 474
475static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 475static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
476 struct buffer_head *bh, 476 struct buffer_head *bh,
477 void *data, size_t size) 477 void *data, size_t size)
478{ 478{
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
491 * Quota blocks have their own trigger because the struct ocfs2_block_check 491 * Quota blocks have their own trigger because the struct ocfs2_block_check
492 * offset depends on the blocksize. 492 * offset depends on the blocksize.
493 */ 493 */
494static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 494static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
495 struct buffer_head *bh, 495 struct buffer_head *bh,
496 void *data, size_t size) 496 void *data, size_t size)
497{ 497{
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
511 * Directory blocks also have their own trigger because the 511 * Directory blocks also have their own trigger because the
512 * struct ocfs2_block_check offset depends on the blocksize. 512 * struct ocfs2_block_check offset depends on the blocksize.
513 */ 513 */
514static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 514static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
515 struct buffer_head *bh, 515 struct buffer_head *bh,
516 void *data, size_t size) 516 void *data, size_t size)
517{ 517{
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
544 544
545static struct ocfs2_triggers di_triggers = { 545static struct ocfs2_triggers di_triggers = {
546 .ot_triggers = { 546 .ot_triggers = {
547 .t_commit = ocfs2_commit_trigger, 547 .t_frozen = ocfs2_frozen_trigger,
548 .t_abort = ocfs2_abort_trigger, 548 .t_abort = ocfs2_abort_trigger,
549 }, 549 },
550 .ot_offset = offsetof(struct ocfs2_dinode, i_check), 550 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
552 552
553static struct ocfs2_triggers eb_triggers = { 553static struct ocfs2_triggers eb_triggers = {
554 .ot_triggers = { 554 .ot_triggers = {
555 .t_commit = ocfs2_commit_trigger, 555 .t_frozen = ocfs2_frozen_trigger,
556 .t_abort = ocfs2_abort_trigger, 556 .t_abort = ocfs2_abort_trigger,
557 }, 557 },
558 .ot_offset = offsetof(struct ocfs2_extent_block, h_check), 558 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
560 560
561static struct ocfs2_triggers rb_triggers = { 561static struct ocfs2_triggers rb_triggers = {
562 .ot_triggers = { 562 .ot_triggers = {
563 .t_commit = ocfs2_commit_trigger, 563 .t_frozen = ocfs2_frozen_trigger,
564 .t_abort = ocfs2_abort_trigger, 564 .t_abort = ocfs2_abort_trigger,
565 }, 565 },
566 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), 566 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
568 568
569static struct ocfs2_triggers gd_triggers = { 569static struct ocfs2_triggers gd_triggers = {
570 .ot_triggers = { 570 .ot_triggers = {
571 .t_commit = ocfs2_commit_trigger, 571 .t_frozen = ocfs2_frozen_trigger,
572 .t_abort = ocfs2_abort_trigger, 572 .t_abort = ocfs2_abort_trigger,
573 }, 573 },
574 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), 574 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
576 576
577static struct ocfs2_triggers db_triggers = { 577static struct ocfs2_triggers db_triggers = {
578 .ot_triggers = { 578 .ot_triggers = {
579 .t_commit = ocfs2_db_commit_trigger, 579 .t_frozen = ocfs2_db_frozen_trigger,
580 .t_abort = ocfs2_abort_trigger, 580 .t_abort = ocfs2_abort_trigger,
581 }, 581 },
582}; 582};
583 583
584static struct ocfs2_triggers xb_triggers = { 584static struct ocfs2_triggers xb_triggers = {
585 .ot_triggers = { 585 .ot_triggers = {
586 .t_commit = ocfs2_commit_trigger, 586 .t_frozen = ocfs2_frozen_trigger,
587 .t_abort = ocfs2_abort_trigger, 587 .t_abort = ocfs2_abort_trigger,
588 }, 588 },
589 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), 589 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
591 591
592static struct ocfs2_triggers dq_triggers = { 592static struct ocfs2_triggers dq_triggers = {
593 .ot_triggers = { 593 .ot_triggers = {
594 .t_commit = ocfs2_dq_commit_trigger, 594 .t_frozen = ocfs2_dq_frozen_trigger,
595 .t_abort = ocfs2_abort_trigger, 595 .t_abort = ocfs2_abort_trigger,
596 }, 596 },
597}; 597};
598 598
599static struct ocfs2_triggers dr_triggers = { 599static struct ocfs2_triggers dr_triggers = {
600 .ot_triggers = { 600 .ot_triggers = {
601 .t_commit = ocfs2_commit_trigger, 601 .t_frozen = ocfs2_frozen_trigger,
602 .t_abort = ocfs2_abort_trigger, 602 .t_abort = ocfs2_abort_trigger,
603 }, 603 },
604 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), 604 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
606 606
607static struct ocfs2_triggers dl_triggers = { 607static struct ocfs2_triggers dl_triggers = {
608 .ot_triggers = { 608 .ot_triggers = {
609 .t_commit = ocfs2_commit_trigger, 609 .t_frozen = ocfs2_frozen_trigger,
610 .t_abort = ocfs2_abort_trigger, 610 .t_abort = ocfs2_abort_trigger,
611 }, 611 },
612 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), 612 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
1936 mutex_lock(&os->os_lock); 1936 mutex_lock(&os->os_lock);
1937 ocfs2_queue_orphan_scan(osb); 1937 ocfs2_queue_orphan_scan(osb);
1938 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) 1938 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1939 schedule_delayed_work(&os->os_orphan_scan_work, 1939 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
1940 ocfs2_orphan_scan_timeout()); 1940 ocfs2_orphan_scan_timeout());
1941 mutex_unlock(&os->os_lock); 1941 mutex_unlock(&os->os_lock);
1942} 1942}
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1976 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1976 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1977 else { 1977 else {
1978 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); 1978 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
1979 schedule_delayed_work(&os->os_orphan_scan_work, 1979 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
1980 ocfs2_orphan_scan_timeout()); 1980 ocfs2_orphan_scan_timeout());
1981 } 1981 }
1982} 1982}
1983 1983
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 3d7419682dc0..ec6adbf8f551 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{ 118{
119 unsigned int la_mb; 119 unsigned int la_mb;
120 unsigned int gd_mb; 120 unsigned int gd_mb;
121 unsigned int la_max_mb;
121 unsigned int megs_per_slot; 122 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb; 123 struct super_block *sb = osb->sb;
123 124
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
182 if (megs_per_slot < la_mb) 183 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot; 184 la_mb = megs_per_slot;
184 185
186 /* We can't store more bits than we can in a block. */
187 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
188 ocfs2_local_alloc_size(sb) * 8);
189 if (la_mb > la_max_mb)
190 la_mb = la_max_mb;
191
185 return la_mb; 192 return la_mb;
186} 193}
187 194
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bb35fe00511..4607923eb24c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
775 * locking allocators ranks above a transaction start 775 * locking allocators ranks above a transaction start
776 */ 776 */
777 WARN_ON(journal_current_handle()); 777 WARN_ON(journal_current_handle());
778 status = ocfs2_extend_no_holes(gqinode, 778 status = ocfs2_extend_no_holes(gqinode, NULL,
779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits), 779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
780 gqinode->i_size); 780 gqinode->i_size);
781 if (status < 0) 781 if (status < 0)
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8bd70d4d184d..dc78764ccc4c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
971 u64 p_blkno; 971 u64 p_blkno;
972 972
973 /* We are protected by dqio_sem so no locking needed */ 973 /* We are protected by dqio_sem so no locking needed */
974 status = ocfs2_extend_no_holes(lqinode, 974 status = ocfs2_extend_no_holes(lqinode, NULL,
975 lqinode->i_size + 2 * sb->s_blocksize, 975 lqinode->i_size + 2 * sb->s_blocksize,
976 lqinode->i_size); 976 lqinode->i_size);
977 if (status < 0) { 977 if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1114 return ocfs2_local_quota_add_chunk(sb, type, offset); 1114 return ocfs2_local_quota_add_chunk(sb, type, offset);
1115 1115
1116 /* We are protected by dqio_sem so no locking needed */ 1116 /* We are protected by dqio_sem so no locking needed */
1117 status = ocfs2_extend_no_holes(lqinode, 1117 status = ocfs2_extend_no_holes(lqinode, NULL,
1118 lqinode->i_size + sb->s_blocksize, 1118 lqinode->i_size + sb->s_blocksize,
1119 lqinode->i_size); 1119 lqinode->i_size);
1120 if (status < 0) { 1120 if (status < 0) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4793f36f6518..3ac5aa733e9c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2931 2931
2932 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; 2932 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2933 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); 2933 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
2934 /*
2935 * We only duplicate pages until we reach the page contains i_size - 1.
2936 * So trim 'end' to i_size.
2937 */
2938 if (end > i_size_read(context->inode))
2939 end = i_size_read(context->inode);
2934 2940
2935 while (offset < end) { 2941 while (offset < end) {
2936 page_index = offset >> PAGE_CACHE_SHIFT; 2942 page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
4166 struct inode *inode = old_dentry->d_inode; 4172 struct inode *inode = old_dentry->d_inode;
4167 struct buffer_head *new_bh = NULL; 4173 struct buffer_head *new_bh = NULL;
4168 4174
4175 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
4176 ret = -EINVAL;
4177 mlog_errno(ret);
4178 goto out;
4179 }
4180
4169 ret = filemap_fdatawrite(inode->i_mapping); 4181 ret = filemap_fdatawrite(inode->i_mapping);
4170 if (ret) { 4182 if (ret) {
4171 mlog_errno(ret); 4183 mlog_errno(ret);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f4c2a9eb8c4d..a8e6a95a353f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
741 le16_to_cpu(bg->bg_free_bits_count)); 741 le16_to_cpu(bg->bg_free_bits_count));
742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, 742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
743 le16_to_cpu(bg->bg_bits)); 743 le16_to_cpu(bg->bg_bits));
744 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno); 744 cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
746 le16_add_cpu(&cl->cl_next_free_rec, 1); 746 le16_add_cpu(&cl->cl_next_free_rec, 1);
747 747
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e97b34842cfe..d03469f61801 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
709 struct ocfs2_xattr_value_buf *vb, 709 struct ocfs2_xattr_value_buf *vb,
710 struct ocfs2_xattr_set_ctxt *ctxt) 710 struct ocfs2_xattr_set_ctxt *ctxt)
711{ 711{
712 int status = 0; 712 int status = 0, credits;
713 handle_t *handle = ctxt->handle; 713 handle_t *handle = ctxt->handle;
714 enum ocfs2_alloc_restarted why; 714 enum ocfs2_alloc_restarted why;
715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
719 719
720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
721 721
722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 722 while (clusters_to_add) {
723 OCFS2_JOURNAL_ACCESS_WRITE); 723 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
724 if (status < 0) { 724 OCFS2_JOURNAL_ACCESS_WRITE);
725 mlog_errno(status); 725 if (status < 0) {
726 goto leave; 726 mlog_errno(status);
727 } 727 break;
728 }
728 729
729 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 730 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
730 status = ocfs2_add_clusters_in_btree(handle, 731 status = ocfs2_add_clusters_in_btree(handle,
731 &et, 732 &et,
732 &logical_start, 733 &logical_start,
733 clusters_to_add, 734 clusters_to_add,
734 0, 735 0,
735 ctxt->data_ac, 736 ctxt->data_ac,
736 ctxt->meta_ac, 737 ctxt->meta_ac,
737 &why); 738 &why);
738 if (status < 0) { 739 if ((status < 0) && (status != -EAGAIN)) {
739 mlog_errno(status); 740 if (status != -ENOSPC)
740 goto leave; 741 mlog_errno(status);
741 } 742 break;
743 }
742 744
743 ocfs2_journal_dirty(handle, vb->vb_bh); 745 ocfs2_journal_dirty(handle, vb->vb_bh);
744 746
745 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; 747 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
748 prev_clusters;
746 749
747 /* 750 if (why != RESTART_NONE && clusters_to_add) {
748 * We should have already allocated enough space before the transaction, 751 /*
749 * so no need to restart. 752 * We can only fail in case the alloc file doesn't give
750 */ 753 * up enough clusters.
751 BUG_ON(why != RESTART_NONE || clusters_to_add); 754 */
752 755 BUG_ON(why == RESTART_META);
753leave: 756
757 mlog(0, "restarting xattr value extension for %u"
758 " clusters,.\n", clusters_to_add);
759 credits = ocfs2_calc_extend_credits(inode->i_sb,
760 &vb->vb_xv->xr_list,
761 clusters_to_add);
762 status = ocfs2_extend_trans(handle, credits);
763 if (status < 0) {
764 status = -ENOMEM;
765 mlog_errno(status);
766 break;
767 }
768 }
769 }
754 770
755 return status; 771 return status;
756} 772}
@@ -6788,16 +6804,15 @@ out:
6788 return ret; 6804 return ret;
6789} 6805}
6790 6806
6791static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6807static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6792 u64 blkno, u64 new_blkno, u32 clusters, 6808 u64 blkno, u64 new_blkno, u32 clusters,
6809 u32 *cpos, int num_buckets,
6793 struct ocfs2_alloc_context *meta_ac, 6810 struct ocfs2_alloc_context *meta_ac,
6794 struct ocfs2_alloc_context *data_ac, 6811 struct ocfs2_alloc_context *data_ac,
6795 struct ocfs2_reflink_xattr_tree_args *args) 6812 struct ocfs2_reflink_xattr_tree_args *args)
6796{ 6813{
6797 int i, j, ret = 0; 6814 int i, j, ret = 0;
6798 struct super_block *sb = args->reflink->old_inode->i_sb; 6815 struct super_block *sb = args->reflink->old_inode->i_sb;
6799 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6800 u32 num_buckets = clusters * bpc;
6801 int bpb = args->old_bucket->bu_blocks; 6816 int bpb = args->old_bucket->bu_blocks;
6802 struct ocfs2_xattr_value_buf vb = { 6817 struct ocfs2_xattr_value_buf vb = {
6803 .vb_access = ocfs2_journal_access, 6818 .vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6816 break; 6831 break;
6817 } 6832 }
6818 6833
6819 /*
6820 * The real bucket num in this series of blocks is stored
6821 * in the 1st bucket.
6822 */
6823 if (i == 0)
6824 num_buckets = le16_to_cpu(
6825 bucket_xh(args->old_bucket)->xh_num_buckets);
6826
6827 ret = ocfs2_xattr_bucket_journal_access(handle, 6834 ret = ocfs2_xattr_bucket_journal_access(handle,
6828 args->new_bucket, 6835 args->new_bucket,
6829 OCFS2_JOURNAL_ACCESS_CREATE); 6836 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6837 bucket_block(args->old_bucket, j), 6844 bucket_block(args->old_bucket, j),
6838 sb->s_blocksize); 6845 sb->s_blocksize);
6839 6846
6847 /*
6848 * Record the start cpos so that we can use it to initialize
6849 * our xattr tree we also set the xh_num_bucket for the new
6850 * bucket.
6851 */
6852 if (i == 0) {
6853 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6854 xh_entries[0].xe_name_hash);
6855 bucket_xh(args->new_bucket)->xh_num_buckets =
6856 cpu_to_le16(num_buckets);
6857 }
6858
6840 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6859 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6841 6860
6842 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6861 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6866 } 6885 }
6867 6886
6868 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6887 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6888
6869 ocfs2_xattr_bucket_relse(args->old_bucket); 6889 ocfs2_xattr_bucket_relse(args->old_bucket);
6870 ocfs2_xattr_bucket_relse(args->new_bucket); 6890 ocfs2_xattr_bucket_relse(args->new_bucket);
6871 } 6891 }
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6874 ocfs2_xattr_bucket_relse(args->new_bucket); 6894 ocfs2_xattr_bucket_relse(args->new_bucket);
6875 return ret; 6895 return ret;
6876} 6896}
6897
6898static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6899 struct inode *inode,
6900 struct ocfs2_reflink_xattr_tree_args *args,
6901 struct ocfs2_extent_tree *et,
6902 struct ocfs2_alloc_context *meta_ac,
6903 struct ocfs2_alloc_context *data_ac,
6904 u64 blkno, u32 cpos, u32 len)
6905{
6906 int ret, first_inserted = 0;
6907 u32 p_cluster, num_clusters, reflink_cpos = 0;
6908 u64 new_blkno;
6909 unsigned int num_buckets, reflink_buckets;
6910 unsigned int bpc =
6911 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6912
6913 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6914 if (ret) {
6915 mlog_errno(ret);
6916 goto out;
6917 }
6918 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6919 ocfs2_xattr_bucket_relse(args->old_bucket);
6920
6921 while (len && num_buckets) {
6922 ret = ocfs2_claim_clusters(handle, data_ac,
6923 1, &p_cluster, &num_clusters);
6924 if (ret) {
6925 mlog_errno(ret);
6926 goto out;
6927 }
6928
6929 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6930 reflink_buckets = min(num_buckets, bpc * num_clusters);
6931
6932 ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6933 new_blkno, num_clusters,
6934 &reflink_cpos, reflink_buckets,
6935 meta_ac, data_ac, args);
6936 if (ret) {
6937 mlog_errno(ret);
6938 goto out;
6939 }
6940
6941 /*
6942 * For the 1st allocated cluster, we make it use the same cpos
6943 * so that the xattr tree looks the same as the original one
6944 * in the most case.
6945 */
6946 if (!first_inserted) {
6947 reflink_cpos = cpos;
6948 first_inserted = 1;
6949 }
6950 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6951 num_clusters, 0, meta_ac);
6952 if (ret)
6953 mlog_errno(ret);
6954
6955 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6956 (unsigned long long)new_blkno, num_clusters, reflink_cpos);
6957
6958 len -= num_clusters;
6959 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6960 num_buckets -= reflink_buckets;
6961 }
6962out:
6963 return ret;
6964}
6965
6877/* 6966/*
6878 * Create the same xattr extent record in the new inode's xattr tree. 6967 * Create the same xattr extent record in the new inode's xattr tree.
6879 */ 6968 */
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6885 void *para) 6974 void *para)
6886{ 6975{
6887 int ret, credits = 0; 6976 int ret, credits = 0;
6888 u32 p_cluster, num_clusters;
6889 u64 new_blkno;
6890 handle_t *handle; 6977 handle_t *handle;
6891 struct ocfs2_reflink_xattr_tree_args *args = 6978 struct ocfs2_reflink_xattr_tree_args *args =
6892 (struct ocfs2_reflink_xattr_tree_args *)para; 6979 (struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6895 struct ocfs2_alloc_context *data_ac = NULL; 6982 struct ocfs2_alloc_context *data_ac = NULL;
6896 struct ocfs2_extent_tree et; 6983 struct ocfs2_extent_tree et;
6897 6984
6985 mlog(0, "reflink xattr buckets %llu len %u\n",
6986 (unsigned long long)blkno, len);
6987
6898 ocfs2_init_xattr_tree_extent_tree(&et, 6988 ocfs2_init_xattr_tree_extent_tree(&et,
6899 INODE_CACHE(args->reflink->new_inode), 6989 INODE_CACHE(args->reflink->new_inode),
6900 args->new_blk_bh); 6990 args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6914 goto out; 7004 goto out;
6915 } 7005 }
6916 7006
6917 ret = ocfs2_claim_clusters(handle, data_ac, 7007 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
6918 len, &p_cluster, &num_clusters); 7008 meta_ac, data_ac,
6919 if (ret) { 7009 blkno, cpos, len);
6920 mlog_errno(ret);
6921 goto out_commit;
6922 }
6923
6924 new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6925
6926 mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6927 (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6928 ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6929 meta_ac, data_ac, args);
6930 if (ret) {
6931 mlog_errno(ret);
6932 goto out_commit;
6933 }
6934
6935 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6936 (unsigned long long)new_blkno, len, cpos);
6937 ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6938 len, 0, meta_ac);
6939 if (ret) 7010 if (ret)
6940 mlog_errno(ret); 7011 mlog_errno(ret);
6941 7012
6942out_commit:
6943 ocfs2_commit_trans(osb, handle); 7013 ocfs2_commit_trans(osb, handle);
6944 7014
6945out: 7015out:
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 3e73de5967ff..fc8497643fd0 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
74 } *label; 74 } *label;
75 unsigned char *data; 75 unsigned char *data;
76 Sector sect; 76 Sector sect;
77 sector_t labelsect;
77 78
78 res = 0; 79 res = 0;
79 blocksize = bdev_logical_block_size(bdev); 80 blocksize = bdev_logical_block_size(bdev);
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state)
98 goto out_freeall; 99 goto out_freeall;
99 100
100 /* 101 /*
102 * Special case for FBA disks: label sector does not depend on
103 * blocksize.
104 */
105 if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
106 (info->cu_type == 0x3880 && info->dev_type == 0x3370))
107 labelsect = info->label_block;
108 else
109 labelsect = info->label_block * (blocksize >> 9);
110
111 /*
101 * Get volume label, extract name and type. 112 * Get volume label, extract name and type.
102 */ 113 */
103 data = read_part_sector(state, info->label_block*(blocksize/512), 114 data = read_part_sector(state, labelsect, &sect);
104 &sect);
105 if (data == NULL) 115 if (data == NULL)
106 goto out_readerr; 116 goto out_readerr;
107 117
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6b..437d2ca2de97 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
676 * This is called from kswapd when we think we need some 676 * This is called from kswapd when we think we need some
677 * more memory 677 * more memory
678 */ 678 */
679static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) 679static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
680{ 680{
681 if (nr) { 681 if (nr) {
682 spin_lock(&dq_list_lock); 682 spin_lock(&dq_list_lock);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index f71246bebfe4..a7ac78f8e67a 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -28,6 +28,7 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
28 struct sysfs_dirent *target_sd = NULL; 28 struct sysfs_dirent *target_sd = NULL;
29 struct sysfs_dirent *sd = NULL; 29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt; 30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type;
31 int error; 32 int error;
32 33
33 BUG_ON(!name); 34 BUG_ON(!name);
@@ -58,16 +59,29 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
58 if (!sd) 59 if (!sd)
59 goto out_put; 60 goto out_put;
60 61
61 if (sysfs_ns_type(parent_sd)) 62 ns_type = sysfs_ns_type(parent_sd);
63 if (ns_type)
62 sd->s_ns = target->ktype->namespace(target); 64 sd->s_ns = target->ktype->namespace(target);
63 sd->s_symlink.target_sd = target_sd; 65 sd->s_symlink.target_sd = target_sd;
64 target_sd = NULL; /* reference is now owned by the symlink */ 66 target_sd = NULL; /* reference is now owned by the symlink */
65 67
66 sysfs_addrm_start(&acxt, parent_sd); 68 sysfs_addrm_start(&acxt, parent_sd);
67 if (warn) 69 /* Symlinks must be between directories with the same ns_type */
68 error = sysfs_add_one(&acxt, sd); 70 if (!ns_type ||
69 else 71 (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
70 error = __sysfs_add_one(&acxt, sd); 72 if (warn)
73 error = sysfs_add_one(&acxt, sd);
74 else
75 error = __sysfs_add_one(&acxt, sd);
76 } else {
77 error = -EINVAL;
78 WARN(1, KERN_WARNING
79 "sysfs: symlink across ns_types %s/%s -> %s/%s\n",
80 parent_sd->s_name,
81 sd->s_name,
82 sd->s_symlink.target_sd->s_parent->s_name,
83 sd->s_symlink.target_sd->s_name);
84 }
71 sysfs_addrm_finish(&acxt); 85 sysfs_addrm_finish(&acxt);
72 86
73 if (error) 87 if (error)
@@ -122,7 +136,7 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
122{ 136{
123 const void *ns = NULL; 137 const void *ns = NULL;
124 spin_lock(&sysfs_assoc_lock); 138 spin_lock(&sysfs_assoc_lock);
125 if (targ->sd) 139 if (targ->sd && sysfs_ns_type(kobj->sd))
126 ns = targ->sd->s_ns; 140 ns = targ->sd->s_ns;
127 spin_unlock(&sysfs_assoc_lock); 141 spin_unlock(&sysfs_assoc_lock);
128 sysfs_hash_and_remove(kobj->sd, ns, name); 142 sysfs_hash_and_remove(kobj->sd, ns, name);
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefca..0b201114a5ad 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
277 return 0; 277 return 0;
278} 278}
279 279
280int ubifs_shrinker(int nr, gfp_t gfp_mask) 280int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
281{ 281{
282 int freed, contention = 0; 282 int freed, contention = 0;
283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c8..04310878f449 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
1575int ubifs_tnc_end_commit(struct ubifs_info *c); 1575int ubifs_tnc_end_commit(struct ubifs_info *c);
1576 1576
1577/* shrinker.c */ 1577/* shrinker.c */
1578int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); 1578int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
1579 1579
1580/* commit.c */ 1580/* commit.c */
1581int ubifs_bg_thread(void *info); 1581int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef598..2ee3f7a60163 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
45 45
46static kmem_zone_t *xfs_buf_zone; 46static kmem_zone_t *xfs_buf_zone;
47STATIC int xfsbufd(void *); 47STATIC int xfsbufd(void *);
48STATIC int xfsbufd_wakeup(int, gfp_t); 48STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
49STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 49STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
50static struct shrinker xfs_buf_shake = { 50static struct shrinker xfs_buf_shake = {
51 .shrink = xfsbufd_wakeup, 51 .shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
340 __func__, gfp_mask); 340 __func__, gfp_mask);
341 341
342 XFS_STATS_INC(xb_page_retries); 342 XFS_STATS_INC(xb_page_retries);
343 xfsbufd_wakeup(0, gfp_mask); 343 xfsbufd_wakeup(NULL, 0, gfp_mask);
344 congestion_wait(BLK_RW_ASYNC, HZ/50); 344 congestion_wait(BLK_RW_ASYNC, HZ/50);
345 goto retry; 345 goto retry;
346 } 346 }
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
1762 1762
1763STATIC int 1763STATIC int
1764xfsbufd_wakeup( 1764xfsbufd_wakeup(
1765 struct shrinker *shrink,
1765 int priority, 1766 int priority,
1766 gfp_t mask) 1767 gfp_t mask)
1767{ 1768{
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c9165..80938c736c27 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
1883 goto out_cleanup_procfs; 1883 goto out_cleanup_procfs;
1884 1884
1885 vfs_initquota(); 1885 vfs_initquota();
1886 xfs_inode_shrinker_init();
1887 1886
1888 error = register_filesystem(&xfs_fs_type); 1887 error = register_filesystem(&xfs_fs_type);
1889 if (error) 1888 if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
1911{ 1910{
1912 vfs_exitquota(); 1911 vfs_exitquota();
1913 unregister_filesystem(&xfs_fs_type); 1912 unregister_filesystem(&xfs_fs_type);
1914 xfs_inode_shrinker_destroy();
1915 xfs_sysctl_unregister(); 1913 xfs_sysctl_unregister();
1916 xfs_cleanup_procfs(); 1914 xfs_cleanup_procfs();
1917 xfs_buf_terminate(); 1915 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bccb..a51a07c3a70c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -144,6 +144,41 @@ restart:
144 return last_error; 144 return last_error;
145} 145}
146 146
147/*
148 * Select the next per-ag structure to iterate during the walk. The reclaim
149 * walk is optimised only to walk AGs with reclaimable inodes in them.
150 */
151static struct xfs_perag *
152xfs_inode_ag_iter_next_pag(
153 struct xfs_mount *mp,
154 xfs_agnumber_t *first,
155 int tag)
156{
157 struct xfs_perag *pag = NULL;
158
159 if (tag == XFS_ICI_RECLAIM_TAG) {
160 int found;
161 int ref;
162
163 spin_lock(&mp->m_perag_lock);
164 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
165 (void **)&pag, *first, 1, tag);
166 if (found <= 0) {
167 spin_unlock(&mp->m_perag_lock);
168 return NULL;
169 }
170 *first = pag->pag_agno + 1;
171 /* open coded pag reference increment */
172 ref = atomic_inc_return(&pag->pag_ref);
173 spin_unlock(&mp->m_perag_lock);
174 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
175 } else {
176 pag = xfs_perag_get(mp, *first);
177 (*first)++;
178 }
179 return pag;
180}
181
147int 182int
148xfs_inode_ag_iterator( 183xfs_inode_ag_iterator(
149 struct xfs_mount *mp, 184 struct xfs_mount *mp,
@@ -154,16 +189,15 @@ xfs_inode_ag_iterator(
154 int exclusive, 189 int exclusive,
155 int *nr_to_scan) 190 int *nr_to_scan)
156{ 191{
192 struct xfs_perag *pag;
157 int error = 0; 193 int error = 0;
158 int last_error = 0; 194 int last_error = 0;
159 xfs_agnumber_t ag; 195 xfs_agnumber_t ag;
160 int nr; 196 int nr;
161 197
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX; 198 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 199 ag = 0;
164 struct xfs_perag *pag; 200 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
165
166 pag = xfs_perag_get(mp, ag);
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 201 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive, &nr); 202 exclusive, &nr);
169 xfs_perag_put(pag); 203 xfs_perag_put(pag);
@@ -640,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
640 radix_tree_tag_set(&pag->pag_ici_root, 674 radix_tree_tag_set(&pag->pag_ici_root,
641 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 675 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
642 XFS_ICI_RECLAIM_TAG); 676 XFS_ICI_RECLAIM_TAG);
677
678 if (!pag->pag_ici_reclaimable) {
679 /* propagate the reclaim tag up into the perag radix tree */
680 spin_lock(&ip->i_mount->m_perag_lock);
681 radix_tree_tag_set(&ip->i_mount->m_perag_tree,
682 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
683 XFS_ICI_RECLAIM_TAG);
684 spin_unlock(&ip->i_mount->m_perag_lock);
685 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
686 -1, _RET_IP_);
687 }
643 pag->pag_ici_reclaimable++; 688 pag->pag_ici_reclaimable++;
644} 689}
645 690
@@ -674,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
674 radix_tree_tag_clear(&pag->pag_ici_root, 719 radix_tree_tag_clear(&pag->pag_ici_root,
675 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 720 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
676 pag->pag_ici_reclaimable--; 721 pag->pag_ici_reclaimable--;
722 if (!pag->pag_ici_reclaimable) {
723 /* clear the reclaim tag from the perag radix tree */
724 spin_lock(&ip->i_mount->m_perag_lock);
725 radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
726 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
727 XFS_ICI_RECLAIM_TAG);
728 spin_unlock(&ip->i_mount->m_perag_lock);
729 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
730 -1, _RET_IP_);
731 }
677} 732}
678 733
679/* 734/*
@@ -828,83 +883,52 @@ xfs_reclaim_inodes(
828 883
829/* 884/*
830 * Shrinker infrastructure. 885 * Shrinker infrastructure.
831 *
832 * This is all far more complex than it needs to be. It adds a global list of
833 * mounts because the shrinkers can only call a global context. We need to make
834 * the shrinkers pass a context to avoid the need for global state.
835 */ 886 */
836static LIST_HEAD(xfs_mount_list);
837static struct rw_semaphore xfs_mount_list_lock;
838
839static int 887static int
840xfs_reclaim_inode_shrink( 888xfs_reclaim_inode_shrink(
889 struct shrinker *shrink,
841 int nr_to_scan, 890 int nr_to_scan,
842 gfp_t gfp_mask) 891 gfp_t gfp_mask)
843{ 892{
844 struct xfs_mount *mp; 893 struct xfs_mount *mp;
845 struct xfs_perag *pag; 894 struct xfs_perag *pag;
846 xfs_agnumber_t ag; 895 xfs_agnumber_t ag;
847 int reclaimable = 0; 896 int reclaimable;
848 897
898 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
849 if (nr_to_scan) { 899 if (nr_to_scan) {
850 if (!(gfp_mask & __GFP_FS)) 900 if (!(gfp_mask & __GFP_FS))
851 return -1; 901 return -1;
852 902
853 down_read(&xfs_mount_list_lock); 903 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
854 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
855 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
856 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); 904 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
857 if (nr_to_scan <= 0) 905 /* if we don't exhaust the scan, don't bother coming back */
858 break; 906 if (nr_to_scan > 0)
859 } 907 return -1;
860 up_read(&xfs_mount_list_lock); 908 }
861 }
862 909
863 down_read(&xfs_mount_list_lock); 910 reclaimable = 0;
864 list_for_each_entry(mp, &xfs_mount_list, m_mplist) { 911 ag = 0;
865 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 912 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
866 pag = xfs_perag_get(mp, ag); 913 XFS_ICI_RECLAIM_TAG))) {
867 reclaimable += pag->pag_ici_reclaimable; 914 reclaimable += pag->pag_ici_reclaimable;
868 xfs_perag_put(pag); 915 xfs_perag_put(pag);
869 }
870 } 916 }
871 up_read(&xfs_mount_list_lock);
872 return reclaimable; 917 return reclaimable;
873} 918}
874 919
875static struct shrinker xfs_inode_shrinker = {
876 .shrink = xfs_reclaim_inode_shrink,
877 .seeks = DEFAULT_SEEKS,
878};
879
880void __init
881xfs_inode_shrinker_init(void)
882{
883 init_rwsem(&xfs_mount_list_lock);
884 register_shrinker(&xfs_inode_shrinker);
885}
886
887void
888xfs_inode_shrinker_destroy(void)
889{
890 ASSERT(list_empty(&xfs_mount_list));
891 unregister_shrinker(&xfs_inode_shrinker);
892}
893
894void 920void
895xfs_inode_shrinker_register( 921xfs_inode_shrinker_register(
896 struct xfs_mount *mp) 922 struct xfs_mount *mp)
897{ 923{
898 down_write(&xfs_mount_list_lock); 924 mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
899 list_add_tail(&mp->m_mplist, &xfs_mount_list); 925 mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
900 up_write(&xfs_mount_list_lock); 926 register_shrinker(&mp->m_inode_shrink);
901} 927}
902 928
903void 929void
904xfs_inode_shrinker_unregister( 930xfs_inode_shrinker_unregister(
905 struct xfs_mount *mp) 931 struct xfs_mount *mp)
906{ 932{
907 down_write(&xfs_mount_list_lock); 933 unregister_shrinker(&mp->m_inode_shrink);
908 list_del(&mp->m_mplist);
909 up_write(&xfs_mount_list_lock);
910} 934}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca9880..e28139aaa4aa 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock, int *nr_to_scan); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57 57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp); 58void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp); 59void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
62 60
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 73d5aa117384..302820690904 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name, \
124 unsigned long caller_ip), \ 124 unsigned long caller_ip), \
125 TP_ARGS(mp, agno, refcount, caller_ip)) 125 TP_ARGS(mp, agno, refcount, caller_ip))
126DEFINE_PERAG_REF_EVENT(xfs_perag_get); 126DEFINE_PERAG_REF_EVENT(xfs_perag_get);
127DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
127DEFINE_PERAG_REF_EVENT(xfs_perag_put); 128DEFINE_PERAG_REF_EVENT(xfs_perag_put);
129DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
130DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
128 131
129TRACE_EVENT(xfs_attr_list_node_descend, 132TRACE_EVENT(xfs_attr_list_node_descend,
130 TP_PROTO(struct xfs_attr_list_context *ctx, 133 TP_PROTO(struct xfs_attr_list_context *ctx,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 8c117ff2e3ab..67c018392d62 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
69 69
70STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 70STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
72STATIC int xfs_qm_shake(int, gfp_t); 72STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
73 73
74static struct shrinker xfs_qm_shaker = { 74static struct shrinker xfs_qm_shaker = {
75 .shrink = xfs_qm_shake, 75 .shrink = xfs_qm_shake,
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
2117 */ 2117 */
2118/* ARGSUSED */ 2118/* ARGSUSED */
2119STATIC int 2119STATIC int
2120xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) 2120xfs_qm_shake(
2121 struct shrinker *shrink,
2122 int nr_to_scan,
2123 gfp_t gfp_mask)
2121{ 2124{
2122 int ndqused, nfree, n; 2125 int ndqused, nfree, n;
2123 2126
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d2c7eed4eda..5761087ee8ea 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */ 262 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
263} xfs_mount_t; 263} xfs_mount_t;
264 264
265/* 265/*