diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-07-21 15:43:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-07-21 15:43:06 -0400 |
commit | 9dcdbf7a33d9018ac5d45debcf261be648bdd56a (patch) | |
tree | bbcc1a018f11ff76cd7ce174ef3ffe2c02da07ee /fs | |
parent | cc5edb0eb9ce892b530e34a5d110382483587942 (diff) | |
parent | cd5b8f8755a89a57fc8c408d284b8b613f090345 (diff) |
Merge branch 'linus' into perf/core
Merge reason: Pick up the latest perf fixes.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
55 files changed, 1019 insertions, 633 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3dab9e9948d0..722743b152d8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode) | |||
680 | { | 680 | { |
681 | struct address_space *mapping = vnode->vfs_inode.i_mapping; | 681 | struct address_space *mapping = vnode->vfs_inode.i_mapping; |
682 | struct writeback_control wbc = { | 682 | struct writeback_control wbc = { |
683 | .bdi = mapping->backing_dev_info, | ||
684 | .sync_mode = WB_SYNC_ALL, | 683 | .sync_mode = WB_SYNC_ALL, |
685 | .nr_to_write = LONG_MAX, | 684 | .nr_to_write = LONG_MAX, |
686 | .range_cyclic = 1, | 685 | .range_cyclic = 1, |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0d1d966b0fe4..c3df14ce2cc2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, | |||
2304 | return ret; | 2304 | return ret; |
2305 | } | 2305 | } |
2306 | 2306 | ||
2307 | /* | ||
2308 | * min slot controls the lowest index we're willing to push to the | ||
2309 | * right. We'll push up to and including min_slot, but no lower | ||
2310 | */ | ||
2307 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | 2311 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, |
2308 | struct btrfs_root *root, | 2312 | struct btrfs_root *root, |
2309 | struct btrfs_path *path, | 2313 | struct btrfs_path *path, |
2310 | int data_size, int empty, | 2314 | int data_size, int empty, |
2311 | struct extent_buffer *right, | 2315 | struct extent_buffer *right, |
2312 | int free_space, u32 left_nritems) | 2316 | int free_space, u32 left_nritems, |
2317 | u32 min_slot) | ||
2313 | { | 2318 | { |
2314 | struct extent_buffer *left = path->nodes[0]; | 2319 | struct extent_buffer *left = path->nodes[0]; |
2315 | struct extent_buffer *upper = path->nodes[1]; | 2320 | struct extent_buffer *upper = path->nodes[1]; |
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2327 | if (empty) | 2332 | if (empty) |
2328 | nr = 0; | 2333 | nr = 0; |
2329 | else | 2334 | else |
2330 | nr = 1; | 2335 | nr = max_t(u32, 1, min_slot); |
2331 | 2336 | ||
2332 | if (path->slots[0] >= left_nritems) | 2337 | if (path->slots[0] >= left_nritems) |
2333 | push_space += data_size; | 2338 | push_space += data_size; |
@@ -2469,10 +2474,14 @@ out_unlock: | |||
2469 | * | 2474 | * |
2470 | * returns 1 if the push failed because the other node didn't have enough | 2475 | * returns 1 if the push failed because the other node didn't have enough |
2471 | * room, 0 if everything worked out and < 0 if there were major errors. | 2476 | * room, 0 if everything worked out and < 0 if there were major errors. |
2477 | * | ||
2478 | * this will push starting from min_slot to the end of the leaf. It won't | ||
2479 | * push any slot lower than min_slot | ||
2472 | */ | 2480 | */ |
2473 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | 2481 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root |
2474 | *root, struct btrfs_path *path, int data_size, | 2482 | *root, struct btrfs_path *path, |
2475 | int empty) | 2483 | int min_data_size, int data_size, |
2484 | int empty, u32 min_slot) | ||
2476 | { | 2485 | { |
2477 | struct extent_buffer *left = path->nodes[0]; | 2486 | struct extent_buffer *left = path->nodes[0]; |
2478 | struct extent_buffer *right; | 2487 | struct extent_buffer *right; |
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2514 | if (left_nritems == 0) | 2523 | if (left_nritems == 0) |
2515 | goto out_unlock; | 2524 | goto out_unlock; |
2516 | 2525 | ||
2517 | return __push_leaf_right(trans, root, path, data_size, empty, | 2526 | return __push_leaf_right(trans, root, path, min_data_size, empty, |
2518 | right, free_space, left_nritems); | 2527 | right, free_space, left_nritems, min_slot); |
2519 | out_unlock: | 2528 | out_unlock: |
2520 | btrfs_tree_unlock(right); | 2529 | btrfs_tree_unlock(right); |
2521 | free_extent_buffer(right); | 2530 | free_extent_buffer(right); |
@@ -2525,12 +2534,17 @@ out_unlock: | |||
2525 | /* | 2534 | /* |
2526 | * push some data in the path leaf to the left, trying to free up at | 2535 | * push some data in the path leaf to the left, trying to free up at |
2527 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2536 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
2537 | * | ||
2538 | * max_slot can put a limit on how far into the leaf we'll push items. The | ||
2539 | * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the | ||
2540 | * items | ||
2528 | */ | 2541 | */ |
2529 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | 2542 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, |
2530 | struct btrfs_root *root, | 2543 | struct btrfs_root *root, |
2531 | struct btrfs_path *path, int data_size, | 2544 | struct btrfs_path *path, int data_size, |
2532 | int empty, struct extent_buffer *left, | 2545 | int empty, struct extent_buffer *left, |
2533 | int free_space, int right_nritems) | 2546 | int free_space, u32 right_nritems, |
2547 | u32 max_slot) | ||
2534 | { | 2548 | { |
2535 | struct btrfs_disk_key disk_key; | 2549 | struct btrfs_disk_key disk_key; |
2536 | struct extent_buffer *right = path->nodes[0]; | 2550 | struct extent_buffer *right = path->nodes[0]; |
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2549 | slot = path->slots[1]; | 2563 | slot = path->slots[1]; |
2550 | 2564 | ||
2551 | if (empty) | 2565 | if (empty) |
2552 | nr = right_nritems; | 2566 | nr = min(right_nritems, max_slot); |
2553 | else | 2567 | else |
2554 | nr = right_nritems - 1; | 2568 | nr = min(right_nritems - 1, max_slot); |
2555 | 2569 | ||
2556 | for (i = 0; i < nr; i++) { | 2570 | for (i = 0; i < nr; i++) { |
2557 | item = btrfs_item_nr(right, i); | 2571 | item = btrfs_item_nr(right, i); |
@@ -2712,10 +2726,14 @@ out: | |||
2712 | /* | 2726 | /* |
2713 | * push some data in the path leaf to the left, trying to free up at | 2727 | * push some data in the path leaf to the left, trying to free up at |
2714 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2728 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
2729 | * | ||
2730 | * max_slot can put a limit on how far into the leaf we'll push items. The | ||
2731 | * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the | ||
2732 | * items | ||
2715 | */ | 2733 | */ |
2716 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | 2734 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root |
2717 | *root, struct btrfs_path *path, int data_size, | 2735 | *root, struct btrfs_path *path, int min_data_size, |
2718 | int empty) | 2736 | int data_size, int empty, u32 max_slot) |
2719 | { | 2737 | { |
2720 | struct extent_buffer *right = path->nodes[0]; | 2738 | struct extent_buffer *right = path->nodes[0]; |
2721 | struct extent_buffer *left; | 2739 | struct extent_buffer *left; |
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2761 | goto out; | 2779 | goto out; |
2762 | } | 2780 | } |
2763 | 2781 | ||
2764 | return __push_leaf_left(trans, root, path, data_size, | 2782 | return __push_leaf_left(trans, root, path, min_data_size, |
2765 | empty, left, free_space, right_nritems); | 2783 | empty, left, free_space, right_nritems, |
2784 | max_slot); | ||
2766 | out: | 2785 | out: |
2767 | btrfs_tree_unlock(left); | 2786 | btrfs_tree_unlock(left); |
2768 | free_extent_buffer(left); | 2787 | free_extent_buffer(left); |
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
2855 | } | 2874 | } |
2856 | 2875 | ||
2857 | /* | 2876 | /* |
2877 | * double splits happen when we need to insert a big item in the middle | ||
2878 | * of a leaf. A double split can leave us with 3 mostly empty leaves: | ||
2879 | * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] | ||
2880 | * A B C | ||
2881 | * | ||
2882 | * We avoid this by trying to push the items on either side of our target | ||
2883 | * into the adjacent leaves. If all goes well we can avoid the double split | ||
2884 | * completely. | ||
2885 | */ | ||
2886 | static noinline int push_for_double_split(struct btrfs_trans_handle *trans, | ||
2887 | struct btrfs_root *root, | ||
2888 | struct btrfs_path *path, | ||
2889 | int data_size) | ||
2890 | { | ||
2891 | int ret; | ||
2892 | int progress = 0; | ||
2893 | int slot; | ||
2894 | u32 nritems; | ||
2895 | |||
2896 | slot = path->slots[0]; | ||
2897 | |||
2898 | /* | ||
2899 | * try to push all the items after our slot into the | ||
2900 | * right leaf | ||
2901 | */ | ||
2902 | ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); | ||
2903 | if (ret < 0) | ||
2904 | return ret; | ||
2905 | |||
2906 | if (ret == 0) | ||
2907 | progress++; | ||
2908 | |||
2909 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
2910 | /* | ||
2911 | * our goal is to get our slot at the start or end of a leaf. If | ||
2912 | * we've done so we're done | ||
2913 | */ | ||
2914 | if (path->slots[0] == 0 || path->slots[0] == nritems) | ||
2915 | return 0; | ||
2916 | |||
2917 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) | ||
2918 | return 0; | ||
2919 | |||
2920 | /* try to push all the items before our slot into the next leaf */ | ||
2921 | slot = path->slots[0]; | ||
2922 | ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); | ||
2923 | if (ret < 0) | ||
2924 | return ret; | ||
2925 | |||
2926 | if (ret == 0) | ||
2927 | progress++; | ||
2928 | |||
2929 | if (progress) | ||
2930 | return 0; | ||
2931 | return 1; | ||
2932 | } | ||
2933 | |||
2934 | /* | ||
2858 | * split the path's leaf in two, making sure there is at least data_size | 2935 | * split the path's leaf in two, making sure there is at least data_size |
2859 | * available for the resulting leaf level of the path. | 2936 | * available for the resulting leaf level of the path. |
2860 | * | 2937 | * |
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2876 | int wret; | 2953 | int wret; |
2877 | int split; | 2954 | int split; |
2878 | int num_doubles = 0; | 2955 | int num_doubles = 0; |
2956 | int tried_avoid_double = 0; | ||
2879 | 2957 | ||
2880 | l = path->nodes[0]; | 2958 | l = path->nodes[0]; |
2881 | slot = path->slots[0]; | 2959 | slot = path->slots[0]; |
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2884 | return -EOVERFLOW; | 2962 | return -EOVERFLOW; |
2885 | 2963 | ||
2886 | /* first try to make some room by pushing left and right */ | 2964 | /* first try to make some room by pushing left and right */ |
2887 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2965 | if (data_size) { |
2888 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2966 | wret = push_leaf_right(trans, root, path, data_size, |
2967 | data_size, 0, 0); | ||
2889 | if (wret < 0) | 2968 | if (wret < 0) |
2890 | return wret; | 2969 | return wret; |
2891 | if (wret) { | 2970 | if (wret) { |
2892 | wret = push_leaf_left(trans, root, path, data_size, 0); | 2971 | wret = push_leaf_left(trans, root, path, data_size, |
2972 | data_size, 0, (u32)-1); | ||
2893 | if (wret < 0) | 2973 | if (wret < 0) |
2894 | return wret; | 2974 | return wret; |
2895 | } | 2975 | } |
@@ -2923,6 +3003,8 @@ again: | |||
2923 | if (mid != nritems && | 3003 | if (mid != nritems && |
2924 | leaf_space_used(l, mid, nritems - mid) + | 3004 | leaf_space_used(l, mid, nritems - mid) + |
2925 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | 3005 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { |
3006 | if (data_size && !tried_avoid_double) | ||
3007 | goto push_for_double; | ||
2926 | split = 2; | 3008 | split = 2; |
2927 | } | 3009 | } |
2928 | } | 3010 | } |
@@ -2939,6 +3021,8 @@ again: | |||
2939 | if (mid != nritems && | 3021 | if (mid != nritems && |
2940 | leaf_space_used(l, mid, nritems - mid) + | 3022 | leaf_space_used(l, mid, nritems - mid) + |
2941 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | 3023 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { |
3024 | if (data_size && !tried_avoid_double) | ||
3025 | goto push_for_double; | ||
2942 | split = 2 ; | 3026 | split = 2 ; |
2943 | } | 3027 | } |
2944 | } | 3028 | } |
@@ -3019,6 +3103,13 @@ again: | |||
3019 | } | 3103 | } |
3020 | 3104 | ||
3021 | return ret; | 3105 | return ret; |
3106 | |||
3107 | push_for_double: | ||
3108 | push_for_double_split(trans, root, path, data_size); | ||
3109 | tried_avoid_double = 1; | ||
3110 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) | ||
3111 | return 0; | ||
3112 | goto again; | ||
3022 | } | 3113 | } |
3023 | 3114 | ||
3024 | static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | 3115 | static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, |
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3915 | extent_buffer_get(leaf); | 4006 | extent_buffer_get(leaf); |
3916 | 4007 | ||
3917 | btrfs_set_path_blocking(path); | 4008 | btrfs_set_path_blocking(path); |
3918 | wret = push_leaf_left(trans, root, path, 1, 1); | 4009 | wret = push_leaf_left(trans, root, path, 1, 1, |
4010 | 1, (u32)-1); | ||
3919 | if (wret < 0 && wret != -ENOSPC) | 4011 | if (wret < 0 && wret != -ENOSPC) |
3920 | ret = wret; | 4012 | ret = wret; |
3921 | 4013 | ||
3922 | if (path->nodes[0] == leaf && | 4014 | if (path->nodes[0] == leaf && |
3923 | btrfs_header_nritems(leaf)) { | 4015 | btrfs_header_nritems(leaf)) { |
3924 | wret = push_leaf_right(trans, root, path, 1, 1); | 4016 | wret = push_leaf_right(trans, root, path, 1, |
4017 | 1, 1, 0); | ||
3925 | if (wret < 0 && wret != -ENOSPC) | 4018 | if (wret < 0 && wret != -ENOSPC) |
3926 | ret = wret; | 4019 | ret = wret; |
3927 | } | 4020 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a4080c21ec55..d74e6af9b53a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2594 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2594 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
2595 | }; | 2595 | }; |
2596 | struct writeback_control wbc_writepages = { | 2596 | struct writeback_control wbc_writepages = { |
2597 | .bdi = wbc->bdi, | ||
2598 | .sync_mode = wbc->sync_mode, | 2597 | .sync_mode = wbc->sync_mode, |
2599 | .older_than_this = NULL, | 2598 | .older_than_this = NULL, |
2600 | .nr_to_write = 64, | 2599 | .nr_to_write = 64, |
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2628 | .sync_io = mode == WB_SYNC_ALL, | 2627 | .sync_io = mode == WB_SYNC_ALL, |
2629 | }; | 2628 | }; |
2630 | struct writeback_control wbc_writepages = { | 2629 | struct writeback_control wbc_writepages = { |
2631 | .bdi = inode->i_mapping->backing_dev_info, | ||
2632 | .sync_mode = mode, | 2630 | .sync_mode = mode, |
2633 | .older_than_this = NULL, | 2631 | .older_than_this = NULL, |
2634 | .nr_to_write = nr_pages * 2, | 2632 | .nr_to_write = nr_pages * 2, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4dbaf89b1337..9254b3d58dbe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1458 | */ | 1458 | */ |
1459 | 1459 | ||
1460 | /* the destination must be opened for writing */ | 1460 | /* the destination must be opened for writing */ |
1461 | if (!(file->f_mode & FMODE_WRITE)) | 1461 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
1462 | return -EINVAL; | 1462 | return -EINVAL; |
1463 | 1463 | ||
1464 | ret = mnt_want_write(file->f_path.mnt); | 1464 | ret = mnt_want_write(file->f_path.mnt); |
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1511 | 1511 | ||
1512 | /* determine range to clone */ | 1512 | /* determine range to clone */ |
1513 | ret = -EINVAL; | 1513 | ret = -EINVAL; |
1514 | if (off >= src->i_size || off + len > src->i_size) | 1514 | if (off + len > src->i_size || off + len < off) |
1515 | goto out_unlock; | 1515 | goto out_unlock; |
1516 | if (len == 0) | 1516 | if (len == 0) |
1517 | olen = len = src->i_size - off; | 1517 | olen = len = src->i_size - off; |
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1578 | u64 disko = 0, diskl = 0; | 1578 | u64 disko = 0, diskl = 0; |
1579 | u64 datao = 0, datal = 0; | 1579 | u64 datao = 0, datal = 0; |
1580 | u8 comp; | 1580 | u8 comp; |
1581 | u64 endoff; | ||
1581 | 1582 | ||
1582 | size = btrfs_item_size_nr(leaf, slot); | 1583 | size = btrfs_item_size_nr(leaf, slot); |
1583 | read_extent_buffer(leaf, buf, | 1584 | read_extent_buffer(leaf, buf, |
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1712 | btrfs_release_path(root, path); | 1713 | btrfs_release_path(root, path); |
1713 | 1714 | ||
1714 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1715 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
1715 | if (new_key.offset + datal > inode->i_size) | 1716 | |
1716 | btrfs_i_size_write(inode, | 1717 | /* |
1717 | new_key.offset + datal); | 1718 | * we round up to the block size at eof when |
1719 | * determining which extents to clone above, | ||
1720 | * but shouldn't round up the file size | ||
1721 | */ | ||
1722 | endoff = new_key.offset + datal; | ||
1723 | if (endoff > off+olen) | ||
1724 | endoff = off+olen; | ||
1725 | if (endoff > inode->i_size) | ||
1726 | btrfs_i_size_write(inode, endoff); | ||
1727 | |||
1718 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | 1728 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; |
1719 | ret = btrfs_update_inode(trans, root, inode); | 1729 | ret = btrfs_update_inode(trans, root, inode); |
1720 | BUG_ON(ret); | 1730 | BUG_ON(ret); |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 83d4d2785ffe..6d44053ecff1 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, | |||
493 | return -EAGAIN; | 493 | return -EAGAIN; |
494 | } | 494 | } |
495 | 495 | ||
496 | op = le32_to_cpu(head->op); | 496 | op = le16_to_cpu(head->op); |
497 | result = le32_to_cpu(head->result); | 497 | result = le32_to_cpu(head->result); |
498 | dout("handle_reply op %d result %d\n", op, result); | 498 | dout("handle_reply op %d result %d\n", op, result); |
499 | switch (op) { | 499 | switch (op) { |
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac) | |||
613 | remove_ticket_handler(ac, th); | 613 | remove_ticket_handler(ac, th); |
614 | } | 614 | } |
615 | 615 | ||
616 | if (xi->auth_authorizer.buf) | ||
617 | ceph_buffer_put(xi->auth_authorizer.buf); | ||
618 | |||
616 | kfree(ac->private); | 619 | kfree(ac->private); |
617 | ac->private = NULL; | 620 | ac->private = NULL; |
618 | } | 621 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 619b61655ee5..74144d6389f0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | |||
244 | struct ceph_cap *cap = NULL; | 244 | struct ceph_cap *cap = NULL; |
245 | 245 | ||
246 | /* temporary, until we do something about cap import/export */ | 246 | /* temporary, until we do something about cap import/export */ |
247 | if (!ctx) | 247 | if (!ctx) { |
248 | return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 248 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
249 | if (cap) { | ||
250 | caps_use_count++; | ||
251 | caps_total_count++; | ||
252 | } | ||
253 | return cap; | ||
254 | } | ||
249 | 255 | ||
250 | spin_lock(&caps_list_lock); | 256 | spin_lock(&caps_list_lock); |
251 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", | 257 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", |
@@ -2886,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode, | |||
2886 | struct ceph_inode_info *ci = ceph_inode(inode); | 2892 | struct ceph_inode_info *ci = ceph_inode(inode); |
2887 | struct ceph_cap *cap; | 2893 | struct ceph_cap *cap; |
2888 | struct ceph_mds_request_release *rel = *p; | 2894 | struct ceph_mds_request_release *rel = *p; |
2895 | int used, dirty; | ||
2889 | int ret = 0; | 2896 | int ret = 0; |
2890 | int used = 0; | ||
2891 | 2897 | ||
2892 | spin_lock(&inode->i_lock); | 2898 | spin_lock(&inode->i_lock); |
2893 | used = __ceph_caps_used(ci); | 2899 | used = __ceph_caps_used(ci); |
2900 | dirty = __ceph_caps_dirty(ci); | ||
2894 | 2901 | ||
2895 | dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, | 2902 | dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n", |
2896 | mds, ceph_cap_string(used), ceph_cap_string(drop), | 2903 | inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop), |
2897 | ceph_cap_string(unless)); | 2904 | ceph_cap_string(unless)); |
2898 | 2905 | ||
2899 | /* only drop unused caps */ | 2906 | /* only drop unused, clean caps */ |
2900 | drop &= ~used; | 2907 | drop &= ~(used | dirty); |
2901 | 2908 | ||
2902 | cap = __get_cap_for_mds(ci, mds); | 2909 | cap = __get_cap_for_mds(ci, mds); |
2903 | if (cap && __cap_is_valid(cap)) { | 2910 | if (cap && __cap_is_valid(cap)) { |
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c index 9ba54efb6543..a4eec133258e 100644 --- a/fs/ceph/crush/mapper.c +++ b/fs/ceph/crush/mapper.c | |||
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, | |||
238 | 238 | ||
239 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | 239 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) |
240 | { | 240 | { |
241 | dprintk("choose %d x=%d r=%d\n", in->id, x, r); | 241 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); |
242 | switch (in->alg) { | 242 | switch (in->alg) { |
243 | case CRUSH_BUCKET_UNIFORM: | 243 | case CRUSH_BUCKET_UNIFORM: |
244 | return bucket_uniform_choose((struct crush_bucket_uniform *)in, | 244 | return bucket_uniform_choose((struct crush_bucket_uniform *)in, |
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | |||
264 | */ | 264 | */ |
265 | static int is_out(struct crush_map *map, __u32 *weight, int item, int x) | 265 | static int is_out(struct crush_map *map, __u32 *weight, int item, int x) |
266 | { | 266 | { |
267 | if (weight[item] >= 0x1000) | 267 | if (weight[item] >= 0x10000) |
268 | return 0; | 268 | return 0; |
269 | if (weight[item] == 0) | 269 | if (weight[item] == 0) |
270 | return 1; | 270 | return 1; |
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map, | |||
305 | int itemtype; | 305 | int itemtype; |
306 | int collide, reject; | 306 | int collide, reject; |
307 | const int orig_tries = 5; /* attempts before we fall back to search */ | 307 | const int orig_tries = 5; /* attempts before we fall back to search */ |
308 | dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos); | 308 | |
309 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", | ||
310 | bucket->id, x, outpos, numrep); | ||
309 | 311 | ||
310 | for (rep = outpos; rep < numrep; rep++) { | 312 | for (rep = outpos; rep < numrep; rep++) { |
311 | /* keep trying until we get a non-out, non-colliding item */ | 313 | /* keep trying until we get a non-out, non-colliding item */ |
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map, | |||
366 | BUG_ON(item >= 0 || | 368 | BUG_ON(item >= 0 || |
367 | (-1-item) >= map->max_buckets); | 369 | (-1-item) >= map->max_buckets); |
368 | in = map->buckets[-1-item]; | 370 | in = map->buckets[-1-item]; |
371 | retry_bucket = 1; | ||
369 | continue; | 372 | continue; |
370 | } | 373 | } |
371 | 374 | ||
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map, | |||
377 | } | 380 | } |
378 | } | 381 | } |
379 | 382 | ||
380 | if (recurse_to_leaf && | 383 | reject = 0; |
381 | item < 0 && | 384 | if (recurse_to_leaf) { |
382 | crush_choose(map, map->buckets[-1-item], | 385 | if (item < 0) { |
383 | weight, | 386 | if (crush_choose(map, |
384 | x, outpos+1, 0, | 387 | map->buckets[-1-item], |
385 | out2, outpos, | 388 | weight, |
386 | firstn, 0, NULL) <= outpos) { | 389 | x, outpos+1, 0, |
387 | reject = 1; | 390 | out2, outpos, |
388 | } else { | 391 | firstn, 0, |
392 | NULL) <= outpos) | ||
393 | /* didn't get leaf */ | ||
394 | reject = 1; | ||
395 | } else { | ||
396 | /* we already have a leaf! */ | ||
397 | out2[outpos] = item; | ||
398 | } | ||
399 | } | ||
400 | |||
401 | if (!reject) { | ||
389 | /* out? */ | 402 | /* out? */ |
390 | if (itemtype == 0) | 403 | if (itemtype == 0) |
391 | reject = is_out(map, weight, | 404 | reject = is_out(map, weight, |
@@ -424,12 +437,12 @@ reject: | |||
424 | continue; | 437 | continue; |
425 | } | 438 | } |
426 | 439 | ||
427 | dprintk("choose got %d\n", item); | 440 | dprintk("CHOOSE got %d\n", item); |
428 | out[outpos] = item; | 441 | out[outpos] = item; |
429 | outpos++; | 442 | outpos++; |
430 | } | 443 | } |
431 | 444 | ||
432 | dprintk("choose returns %d\n", outpos); | 445 | dprintk("CHOOSE returns %d\n", outpos); |
433 | return outpos; | 446 | return outpos; |
434 | } | 447 | } |
435 | 448 | ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3be33fb066cc..f2f5332ddbba 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp) | |||
261 | 261 | ||
262 | static int caps_show(struct seq_file *s, void *p) | 262 | static int caps_show(struct seq_file *s, void *p) |
263 | { | 263 | { |
264 | struct ceph_client *client = p; | 264 | struct ceph_client *client = s->private; |
265 | int total, avail, used, reserved, min; | 265 | int total, avail, used, reserved, min; |
266 | 266 | ||
267 | ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); | 267 | ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ab47f46ca282..8f9b9fe8ef9f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
854 | d_drop(dn); | 854 | d_drop(dn); |
855 | realdn = d_materialise_unique(dn, in); | 855 | realdn = d_materialise_unique(dn, in); |
856 | if (IS_ERR(realdn)) { | 856 | if (IS_ERR(realdn)) { |
857 | pr_err("splice_dentry error %p inode %p ino %llx.%llx\n", | 857 | pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", |
858 | dn, in, ceph_vinop(in)); | 858 | PTR_ERR(realdn), dn, in, ceph_vinop(in)); |
859 | if (prehash) | 859 | if (prehash) |
860 | *prehash = false; /* don't rehash on error */ | 860 | *prehash = false; /* don't rehash on error */ |
861 | dn = realdn; /* note realdn contains the error */ | 861 | dn = realdn; /* note realdn contains the error */ |
@@ -1234,18 +1234,23 @@ retry_lookup: | |||
1234 | goto out; | 1234 | goto out; |
1235 | } | 1235 | } |
1236 | dn = splice_dentry(dn, in, NULL); | 1236 | dn = splice_dentry(dn, in, NULL); |
1237 | if (IS_ERR(dn)) | ||
1238 | dn = NULL; | ||
1237 | } | 1239 | } |
1238 | 1240 | ||
1239 | if (fill_inode(in, &rinfo->dir_in[i], NULL, session, | 1241 | if (fill_inode(in, &rinfo->dir_in[i], NULL, session, |
1240 | req->r_request_started, -1, | 1242 | req->r_request_started, -1, |
1241 | &req->r_caps_reservation) < 0) { | 1243 | &req->r_caps_reservation) < 0) { |
1242 | pr_err("fill_inode badness on %p\n", in); | 1244 | pr_err("fill_inode badness on %p\n", in); |
1243 | dput(dn); | 1245 | goto next_item; |
1244 | continue; | ||
1245 | } | 1246 | } |
1246 | update_dentry_lease(dn, rinfo->dir_dlease[i], | 1247 | if (dn) |
1247 | req->r_session, req->r_request_started); | 1248 | update_dentry_lease(dn, rinfo->dir_dlease[i], |
1248 | dput(dn); | 1249 | req->r_session, |
1250 | req->r_request_started); | ||
1251 | next_item: | ||
1252 | if (dn) | ||
1253 | dput(dn); | ||
1249 | } | 1254 | } |
1250 | req->r_did_prepopulate = true; | 1255 | req->r_did_prepopulate = true; |
1251 | 1256 | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1766947fc07a..416c08d315db 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1514 | ceph_encode_filepath(&p, end, ino1, path1); | 1514 | ceph_encode_filepath(&p, end, ino1, path1); |
1515 | ceph_encode_filepath(&p, end, ino2, path2); | 1515 | ceph_encode_filepath(&p, end, ino2, path2); |
1516 | 1516 | ||
1517 | /* make note of release offset, in case we need to replay */ | ||
1518 | req->r_request_release_offset = p - msg->front.iov_base; | ||
1519 | |||
1517 | /* cap releases */ | 1520 | /* cap releases */ |
1518 | releases = 0; | 1521 | releases = 0; |
1519 | if (req->r_inode_drop) | 1522 | if (req->r_inode_drop) |
@@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1580 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, | 1583 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, |
1581 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); | 1584 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); |
1582 | 1585 | ||
1586 | if (req->r_got_unsafe) { | ||
1587 | /* | ||
1588 | * Replay. Do not regenerate message (and rebuild | ||
1589 | * paths, etc.); just use the original message. | ||
1590 | * Rebuilding paths will break for renames because | ||
1591 | * d_move mangles the src name. | ||
1592 | */ | ||
1593 | msg = req->r_request; | ||
1594 | rhead = msg->front.iov_base; | ||
1595 | |||
1596 | flags = le32_to_cpu(rhead->flags); | ||
1597 | flags |= CEPH_MDS_FLAG_REPLAY; | ||
1598 | rhead->flags = cpu_to_le32(flags); | ||
1599 | |||
1600 | if (req->r_target_inode) | ||
1601 | rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); | ||
1602 | |||
1603 | rhead->num_retry = req->r_attempts - 1; | ||
1604 | |||
1605 | /* remove cap/dentry releases from message */ | ||
1606 | rhead->num_releases = 0; | ||
1607 | msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset); | ||
1608 | msg->front.iov_len = req->r_request_release_offset; | ||
1609 | return 0; | ||
1610 | } | ||
1611 | |||
1583 | if (req->r_request) { | 1612 | if (req->r_request) { |
1584 | ceph_msg_put(req->r_request); | 1613 | ceph_msg_put(req->r_request); |
1585 | req->r_request = NULL; | 1614 | req->r_request = NULL; |
@@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1601 | rhead->flags = cpu_to_le32(flags); | 1630 | rhead->flags = cpu_to_le32(flags); |
1602 | rhead->num_fwd = req->r_num_fwd; | 1631 | rhead->num_fwd = req->r_num_fwd; |
1603 | rhead->num_retry = req->r_attempts - 1; | 1632 | rhead->num_retry = req->r_attempts - 1; |
1633 | rhead->ino = 0; | ||
1604 | 1634 | ||
1605 | dout(" r_locked_dir = %p\n", req->r_locked_dir); | 1635 | dout(" r_locked_dir = %p\n", req->r_locked_dir); |
1606 | |||
1607 | if (req->r_target_inode && req->r_got_unsafe) | ||
1608 | rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); | ||
1609 | else | ||
1610 | rhead->ino = 0; | ||
1611 | return 0; | 1636 | return 0; |
1612 | } | 1637 | } |
1613 | 1638 | ||
@@ -2783,6 +2808,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) | |||
2783 | drop_leases(mdsc); | 2808 | drop_leases(mdsc); |
2784 | ceph_flush_dirty_caps(mdsc); | 2809 | ceph_flush_dirty_caps(mdsc); |
2785 | wait_requests(mdsc); | 2810 | wait_requests(mdsc); |
2811 | |||
2812 | /* | ||
2813 | * wait for reply handlers to drop their request refs and | ||
2814 | * their inode/dcache refs | ||
2815 | */ | ||
2816 | ceph_msgr_flush(); | ||
2786 | } | 2817 | } |
2787 | 2818 | ||
2788 | /* | 2819 | /* |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index b292fa42a66d..952410c60d09 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -188,6 +188,7 @@ struct ceph_mds_request { | |||
188 | int r_old_inode_drop, r_old_inode_unless; | 188 | int r_old_inode_drop, r_old_inode_unless; |
189 | 189 | ||
190 | struct ceph_msg *r_request; /* original request */ | 190 | struct ceph_msg *r_request; /* original request */ |
191 | int r_request_release_offset; | ||
191 | struct ceph_msg *r_reply; | 192 | struct ceph_msg *r_reply; |
192 | struct ceph_mds_reply_info_parsed r_reply_info; | 193 | struct ceph_mds_reply_info_parsed r_reply_info; |
193 | int r_err; | 194 | int r_err; |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 64b8b1f7863d..15167b2daa55 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con); | |||
43 | * nicely render a sockaddr as a string. | 43 | * nicely render a sockaddr as a string. |
44 | */ | 44 | */ |
45 | #define MAX_ADDR_STR 20 | 45 | #define MAX_ADDR_STR 20 |
46 | static char addr_str[MAX_ADDR_STR][40]; | 46 | #define MAX_ADDR_STR_LEN 60 |
47 | static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | ||
47 | static DEFINE_SPINLOCK(addr_str_lock); | 48 | static DEFINE_SPINLOCK(addr_str_lock); |
48 | static int last_addr_str; | 49 | static int last_addr_str; |
49 | 50 | ||
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss) | |||
52 | int i; | 53 | int i; |
53 | char *s; | 54 | char *s; |
54 | struct sockaddr_in *in4 = (void *)ss; | 55 | struct sockaddr_in *in4 = (void *)ss; |
55 | unsigned char *quad = (void *)&in4->sin_addr.s_addr; | ||
56 | struct sockaddr_in6 *in6 = (void *)ss; | 56 | struct sockaddr_in6 *in6 = (void *)ss; |
57 | 57 | ||
58 | spin_lock(&addr_str_lock); | 58 | spin_lock(&addr_str_lock); |
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss) | |||
64 | 64 | ||
65 | switch (ss->ss_family) { | 65 | switch (ss->ss_family) { |
66 | case AF_INET: | 66 | case AF_INET: |
67 | sprintf(s, "%u.%u.%u.%u:%u", | 67 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, |
68 | (unsigned int)quad[0], | 68 | (unsigned int)ntohs(in4->sin_port)); |
69 | (unsigned int)quad[1], | ||
70 | (unsigned int)quad[2], | ||
71 | (unsigned int)quad[3], | ||
72 | (unsigned int)ntohs(in4->sin_port)); | ||
73 | break; | 69 | break; |
74 | 70 | ||
75 | case AF_INET6: | 71 | case AF_INET6: |
76 | sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u", | 72 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, |
77 | in6->sin6_addr.s6_addr16[0], | 73 | (unsigned int)ntohs(in6->sin6_port)); |
78 | in6->sin6_addr.s6_addr16[1], | ||
79 | in6->sin6_addr.s6_addr16[2], | ||
80 | in6->sin6_addr.s6_addr16[3], | ||
81 | in6->sin6_addr.s6_addr16[4], | ||
82 | in6->sin6_addr.s6_addr16[5], | ||
83 | in6->sin6_addr.s6_addr16[6], | ||
84 | in6->sin6_addr.s6_addr16[7], | ||
85 | (unsigned int)ntohs(in6->sin6_port)); | ||
86 | break; | 74 | break; |
87 | 75 | ||
88 | default: | 76 | default: |
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock, | |||
215 | */ | 203 | */ |
216 | static struct socket *ceph_tcp_connect(struct ceph_connection *con) | 204 | static struct socket *ceph_tcp_connect(struct ceph_connection *con) |
217 | { | 205 | { |
218 | struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr; | 206 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; |
219 | struct socket *sock; | 207 | struct socket *sock; |
220 | int ret; | 208 | int ret; |
221 | 209 | ||
222 | BUG_ON(con->sock); | 210 | BUG_ON(con->sock); |
223 | ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); | 211 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, |
212 | IPPROTO_TCP, &sock); | ||
224 | if (ret) | 213 | if (ret) |
225 | return ERR_PTR(ret); | 214 | return ERR_PTR(ret); |
226 | con->sock = sock; | 215 | con->sock = sock; |
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
234 | 223 | ||
235 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | 224 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); |
236 | 225 | ||
237 | ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK); | 226 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
227 | O_NONBLOCK); | ||
238 | if (ret == -EINPROGRESS) { | 228 | if (ret == -EINPROGRESS) { |
239 | dout("connect %s EINPROGRESS sk_state = %u\n", | 229 | dout("connect %s EINPROGRESS sk_state = %u\n", |
240 | pr_addr(&con->peer_addr.in_addr), | 230 | pr_addr(&con->peer_addr.in_addr), |
@@ -657,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
657 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
658 | con->connect_seq, global_seq, proto); | 648 | con->connect_seq, global_seq, proto); |
659 | 649 | ||
660 | con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; | 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); |
661 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
662 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
663 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end, | |||
1009 | struct sockaddr_in *in4 = (void *)ss; | 999 | struct sockaddr_in *in4 = (void *)ss; |
1010 | struct sockaddr_in6 *in6 = (void *)ss; | 1000 | struct sockaddr_in6 *in6 = (void *)ss; |
1011 | int port; | 1001 | int port; |
1002 | char delim = ','; | ||
1003 | |||
1004 | if (*p == '[') { | ||
1005 | delim = ']'; | ||
1006 | p++; | ||
1007 | } | ||
1012 | 1008 | ||
1013 | memset(ss, 0, sizeof(*ss)); | 1009 | memset(ss, 0, sizeof(*ss)); |
1014 | if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, | 1010 | if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, |
1015 | ',', &ipend)) { | 1011 | delim, &ipend)) |
1016 | ss->ss_family = AF_INET; | 1012 | ss->ss_family = AF_INET; |
1017 | } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, | 1013 | else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, |
1018 | ',', &ipend)) { | 1014 | delim, &ipend)) |
1019 | ss->ss_family = AF_INET6; | 1015 | ss->ss_family = AF_INET6; |
1020 | } else { | 1016 | else |
1021 | goto bad; | 1017 | goto bad; |
1022 | } | ||
1023 | p = ipend; | 1018 | p = ipend; |
1024 | 1019 | ||
1020 | if (delim == ']') { | ||
1021 | if (*p != ']') { | ||
1022 | dout("missing matching ']'\n"); | ||
1023 | goto bad; | ||
1024 | } | ||
1025 | p++; | ||
1026 | } | ||
1027 | |||
1025 | /* port? */ | 1028 | /* port? */ |
1026 | if (p < end && *p == ':') { | 1029 | if (p < end && *p == ':') { |
1027 | port = 0; | 1030 | port = 0; |
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end, | |||
1055 | return 0; | 1058 | return 0; |
1056 | 1059 | ||
1057 | bad: | 1060 | bad: |
1058 | pr_err("parse_ips bad ip '%s'\n", c); | 1061 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); |
1059 | return -EINVAL; | 1062 | return -EINVAL; |
1060 | } | 1063 | } |
1061 | 1064 | ||
@@ -1396,10 +1399,12 @@ static int read_partial_message(struct ceph_connection *con) | |||
1396 | if (!con->in_msg) { | 1399 | if (!con->in_msg) { |
1397 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, | 1400 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, |
1398 | con->in_hdr.front_len, con->in_hdr.data_len); | 1401 | con->in_hdr.front_len, con->in_hdr.data_len); |
1402 | skip = 0; | ||
1399 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | 1403 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); |
1400 | if (skip) { | 1404 | if (skip) { |
1401 | /* skip this message */ | 1405 | /* skip this message */ |
1402 | dout("alloc_msg said skip message\n"); | 1406 | dout("alloc_msg said skip message\n"); |
1407 | BUG_ON(con->in_msg); | ||
1403 | con->in_base_pos = -front_len - middle_len - data_len - | 1408 | con->in_base_pos = -front_len - middle_len - data_len - |
1404 | sizeof(m->footer); | 1409 | sizeof(m->footer); |
1405 | con->in_tag = CEPH_MSGR_TAG_READY; | 1410 | con->in_tag = CEPH_MSGR_TAG_READY; |
@@ -2013,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) | |||
2013 | { | 2018 | { |
2014 | mutex_lock(&con->mutex); | 2019 | mutex_lock(&con->mutex); |
2015 | if (!list_empty(&msg->list_head)) { | 2020 | if (!list_empty(&msg->list_head)) { |
2016 | dout("con_revoke %p msg %p\n", con, msg); | 2021 | dout("con_revoke %p msg %p - was on queue\n", con, msg); |
2017 | list_del_init(&msg->list_head); | 2022 | list_del_init(&msg->list_head); |
2018 | ceph_msg_put(msg); | 2023 | ceph_msg_put(msg); |
2019 | msg->hdr.seq = 0; | 2024 | msg->hdr.seq = 0; |
2020 | if (con->out_msg == msg) { | 2025 | } |
2021 | ceph_msg_put(con->out_msg); | 2026 | if (con->out_msg == msg) { |
2022 | con->out_msg = NULL; | 2027 | dout("con_revoke %p msg %p - was sending\n", con, msg); |
2023 | } | 2028 | con->out_msg = NULL; |
2024 | if (con->out_kvec_is_msg) { | 2029 | if (con->out_kvec_is_msg) { |
2025 | con->out_skip = con->out_kvec_bytes; | 2030 | con->out_skip = con->out_kvec_bytes; |
2026 | con->out_kvec_is_msg = false; | 2031 | con->out_kvec_is_msg = false; |
2027 | } | 2032 | } |
2028 | } else { | 2033 | ceph_msg_put(msg); |
2029 | dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg); | 2034 | msg->hdr.seq = 0; |
2030 | } | 2035 | } |
2031 | mutex_unlock(&con->mutex); | 2036 | mutex_unlock(&con->mutex); |
2032 | } | 2037 | } |
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 07a539906e67..cc115eafae11 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
@@ -725,7 +725,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
725 | dout("authenticated, starting session\n"); | 725 | dout("authenticated, starting session\n"); |
726 | 726 | ||
727 | monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; | 727 | monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; |
728 | monc->client->msgr->inst.name.num = monc->auth->global_id; | 728 | monc->client->msgr->inst.name.num = |
729 | cpu_to_le64(monc->auth->global_id); | ||
729 | 730 | ||
730 | __send_subscribe(monc); | 731 | __send_subscribe(monc); |
731 | __resend_generic_request(monc); | 732 | __resend_generic_request(monc); |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index d25b4add85b4..92b7251a53f1 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
1344 | int type = le16_to_cpu(msg->hdr.type); | 1344 | int type = le16_to_cpu(msg->hdr.type); |
1345 | 1345 | ||
1346 | if (!osd) | 1346 | if (!osd) |
1347 | return; | 1347 | goto out; |
1348 | osdc = osd->o_osdc; | 1348 | osdc = osd->o_osdc; |
1349 | 1349 | ||
1350 | switch (type) { | 1350 | switch (type) { |
@@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
1359 | pr_err("received unknown message type %d %s\n", type, | 1359 | pr_err("received unknown message type %d %s\n", type, |
1360 | ceph_msg_type_name(type)); | 1360 | ceph_msg_type_name(type)); |
1361 | } | 1361 | } |
1362 | out: | ||
1362 | ceph_msg_put(msg); | 1363 | ceph_msg_put(msg); |
1363 | } | 1364 | } |
1364 | 1365 | ||
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index ddc656fb5c05..277f8b339577 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
568 | if (ev > CEPH_PG_POOL_VERSION) { | 568 | if (ev > CEPH_PG_POOL_VERSION) { |
569 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | 569 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", |
570 | ev, CEPH_PG_POOL_VERSION); | 570 | ev, CEPH_PG_POOL_VERSION); |
571 | kfree(pi); | ||
571 | goto bad; | 572 | goto bad; |
572 | } | 573 | } |
573 | __decode_pool(p, pi); | 574 | __decode_pool(p, pi); |
@@ -707,6 +708,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
707 | newcrush = crush_decode(*p, min(*p+len, end)); | 708 | newcrush = crush_decode(*p, min(*p+len, end)); |
708 | if (IS_ERR(newcrush)) | 709 | if (IS_ERR(newcrush)) |
709 | return ERR_CAST(newcrush); | 710 | return ERR_CAST(newcrush); |
711 | *p += len; | ||
710 | } | 712 | } |
711 | 713 | ||
712 | /* new flags? */ | 714 | /* new flags? */ |
diff --git a/fs/dcache.c b/fs/dcache.c index c8c78ba07827..86d4db15473e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent); | |||
896 | * | 896 | * |
897 | * In this case we return -1 to tell the caller that we baled. | 897 | * In this case we return -1 to tell the caller that we baled. |
898 | */ | 898 | */ |
899 | static int shrink_dcache_memory(int nr, gfp_t gfp_mask) | 899 | static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) |
900 | { | 900 | { |
901 | if (nr) { | 901 | if (nr) { |
902 | if (!(gfp_mask & __GFP_FS)) | 902 | if (!(gfp_mask & __GFP_FS)) |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0609607d3955..d5be1693ac93 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -38,43 +38,18 @@ int nr_pdflush_threads; | |||
38 | /* | 38 | /* |
39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 39 | * Passed into wb_writeback(), essentially a subset of writeback_control |
40 | */ | 40 | */ |
41 | struct wb_writeback_args { | 41 | struct wb_writeback_work { |
42 | long nr_pages; | 42 | long nr_pages; |
43 | struct super_block *sb; | 43 | struct super_block *sb; |
44 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
48 | }; | ||
49 | 48 | ||
50 | /* | ||
51 | * Work items for the bdi_writeback threads | ||
52 | */ | ||
53 | struct bdi_work { | ||
54 | struct list_head list; /* pending work list */ | 49 | struct list_head list; /* pending work list */ |
55 | struct rcu_head rcu_head; /* for RCU free/clear of work */ | 50 | struct completion *done; /* set if the caller waits */ |
56 | |||
57 | unsigned long seen; /* threads that have seen this work */ | ||
58 | atomic_t pending; /* number of threads still to do work */ | ||
59 | |||
60 | struct wb_writeback_args args; /* writeback arguments */ | ||
61 | |||
62 | unsigned long state; /* flag bits, see WS_* */ | ||
63 | }; | 51 | }; |
64 | 52 | ||
65 | enum { | ||
66 | WS_INPROGRESS = 0, | ||
67 | WS_ONSTACK, | ||
68 | }; | ||
69 | |||
70 | static inline void bdi_work_init(struct bdi_work *work, | ||
71 | struct wb_writeback_args *args) | ||
72 | { | ||
73 | INIT_RCU_HEAD(&work->rcu_head); | ||
74 | work->args = *args; | ||
75 | __set_bit(WS_INPROGRESS, &work->state); | ||
76 | } | ||
77 | |||
78 | /** | 53 | /** |
79 | * writeback_in_progress - determine whether there is writeback in progress | 54 | * writeback_in_progress - determine whether there is writeback in progress |
80 | * @bdi: the device's backing_dev_info structure. | 55 | * @bdi: the device's backing_dev_info structure. |
@@ -87,49 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
87 | return !list_empty(&bdi->work_list); | 62 | return !list_empty(&bdi->work_list); |
88 | } | 63 | } |
89 | 64 | ||
90 | static void bdi_work_free(struct rcu_head *head) | 65 | static void bdi_queue_work(struct backing_dev_info *bdi, |
91 | { | 66 | struct wb_writeback_work *work) |
92 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); | ||
93 | |||
94 | clear_bit(WS_INPROGRESS, &work->state); | ||
95 | smp_mb__after_clear_bit(); | ||
96 | wake_up_bit(&work->state, WS_INPROGRESS); | ||
97 | |||
98 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
99 | kfree(work); | ||
100 | } | ||
101 | |||
102 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
103 | { | 67 | { |
104 | /* | ||
105 | * The caller has retrieved the work arguments from this work, | ||
106 | * drop our reference. If this is the last ref, delete and free it | ||
107 | */ | ||
108 | if (atomic_dec_and_test(&work->pending)) { | ||
109 | struct backing_dev_info *bdi = wb->bdi; | ||
110 | |||
111 | spin_lock(&bdi->wb_lock); | ||
112 | list_del_rcu(&work->list); | ||
113 | spin_unlock(&bdi->wb_lock); | ||
114 | |||
115 | call_rcu(&work->rcu_head, bdi_work_free); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | ||
120 | { | ||
121 | work->seen = bdi->wb_mask; | ||
122 | BUG_ON(!work->seen); | ||
123 | atomic_set(&work->pending, bdi->wb_cnt); | ||
124 | BUG_ON(!bdi->wb_cnt); | ||
125 | |||
126 | /* | ||
127 | * list_add_tail_rcu() contains the necessary barriers to | ||
128 | * make sure the above stores are seen before the item is | ||
129 | * noticed on the list | ||
130 | */ | ||
131 | spin_lock(&bdi->wb_lock); | 68 | spin_lock(&bdi->wb_lock); |
132 | list_add_tail_rcu(&work->list, &bdi->work_list); | 69 | list_add_tail(&work->list, &bdi->work_list); |
133 | spin_unlock(&bdi->wb_lock); | 70 | spin_unlock(&bdi->wb_lock); |
134 | 71 | ||
135 | /* | 72 | /* |
@@ -146,55 +83,29 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | |||
146 | } | 83 | } |
147 | } | 84 | } |
148 | 85 | ||
149 | /* | 86 | static void |
150 | * Used for on-stack allocated work items. The caller needs to wait until | 87 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
151 | * the wb threads have acked the work before it's safe to continue. | 88 | bool range_cyclic, bool for_background) |
152 | */ | ||
153 | static void bdi_wait_on_work_done(struct bdi_work *work) | ||
154 | { | 89 | { |
155 | wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait, | 90 | struct wb_writeback_work *work; |
156 | TASK_UNINTERRUPTIBLE); | ||
157 | } | ||
158 | |||
159 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | ||
160 | struct wb_writeback_args *args) | ||
161 | { | ||
162 | struct bdi_work *work; | ||
163 | 91 | ||
164 | /* | 92 | /* |
165 | * This is WB_SYNC_NONE writeback, so if allocation fails just | 93 | * This is WB_SYNC_NONE writeback, so if allocation fails just |
166 | * wakeup the thread for old dirty data writeback | 94 | * wakeup the thread for old dirty data writeback |
167 | */ | 95 | */ |
168 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
169 | if (work) { | 97 | if (!work) { |
170 | bdi_work_init(work, args); | 98 | if (bdi->wb.task) |
171 | bdi_queue_work(bdi, work); | 99 | wake_up_process(bdi->wb.task); |
172 | } else { | 100 | return; |
173 | struct bdi_writeback *wb = &bdi->wb; | ||
174 | |||
175 | if (wb->task) | ||
176 | wake_up_process(wb->task); | ||
177 | } | 101 | } |
178 | } | ||
179 | 102 | ||
180 | /** | 103 | work->sync_mode = WB_SYNC_NONE; |
181 | * bdi_queue_work_onstack - start and wait for writeback | 104 | work->nr_pages = nr_pages; |
182 | * @sb: write inodes from this super_block | 105 | work->range_cyclic = range_cyclic; |
183 | * | 106 | work->for_background = for_background; |
184 | * Description: | ||
185 | * This function initiates writeback and waits for the operation to | ||
186 | * complete. Callers must hold the sb s_umount semaphore for | ||
187 | * reading, to avoid having the super disappear before we are done. | ||
188 | */ | ||
189 | static void bdi_queue_work_onstack(struct wb_writeback_args *args) | ||
190 | { | ||
191 | struct bdi_work work; | ||
192 | |||
193 | bdi_work_init(&work, args); | ||
194 | __set_bit(WS_ONSTACK, &work.state); | ||
195 | 107 | ||
196 | bdi_queue_work(args->sb->s_bdi, &work); | 108 | bdi_queue_work(bdi, work); |
197 | bdi_wait_on_work_done(&work); | ||
198 | } | 109 | } |
199 | 110 | ||
200 | /** | 111 | /** |
@@ -210,13 +121,7 @@ static void bdi_queue_work_onstack(struct wb_writeback_args *args) | |||
210 | */ | 121 | */ |
211 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | 122 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
212 | { | 123 | { |
213 | struct wb_writeback_args args = { | 124 | __bdi_start_writeback(bdi, nr_pages, true, false); |
214 | .sync_mode = WB_SYNC_NONE, | ||
215 | .nr_pages = nr_pages, | ||
216 | .range_cyclic = 1, | ||
217 | }; | ||
218 | |||
219 | bdi_alloc_queue_work(bdi, &args); | ||
220 | } | 125 | } |
221 | 126 | ||
222 | /** | 127 | /** |
@@ -230,13 +135,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | |||
230 | */ | 135 | */ |
231 | void bdi_start_background_writeback(struct backing_dev_info *bdi) | 136 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
232 | { | 137 | { |
233 | struct wb_writeback_args args = { | 138 | __bdi_start_writeback(bdi, LONG_MAX, true, true); |
234 | .sync_mode = WB_SYNC_NONE, | ||
235 | .nr_pages = LONG_MAX, | ||
236 | .for_background = 1, | ||
237 | .range_cyclic = 1, | ||
238 | }; | ||
239 | bdi_alloc_queue_work(bdi, &args); | ||
240 | } | 139 | } |
241 | 140 | ||
242 | /* | 141 | /* |
@@ -554,29 +453,41 @@ static bool pin_sb_for_writeback(struct super_block *sb) | |||
554 | 453 | ||
555 | /* | 454 | /* |
556 | * Write a portion of b_io inodes which belong to @sb. | 455 | * Write a portion of b_io inodes which belong to @sb. |
557 | * If @wbc->sb != NULL, then find and write all such | 456 | * |
457 | * If @only_this_sb is true, then find and write all such | ||
558 | * inodes. Otherwise write only ones which go sequentially | 458 | * inodes. Otherwise write only ones which go sequentially |
559 | * in reverse order. | 459 | * in reverse order. |
460 | * | ||
560 | * Return 1, if the caller writeback routine should be | 461 | * Return 1, if the caller writeback routine should be |
561 | * interrupted. Otherwise return 0. | 462 | * interrupted. Otherwise return 0. |
562 | */ | 463 | */ |
563 | static int writeback_sb_inodes(struct super_block *sb, | 464 | static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
564 | struct bdi_writeback *wb, | 465 | struct writeback_control *wbc, bool only_this_sb) |
565 | struct writeback_control *wbc) | ||
566 | { | 466 | { |
567 | while (!list_empty(&wb->b_io)) { | 467 | while (!list_empty(&wb->b_io)) { |
568 | long pages_skipped; | 468 | long pages_skipped; |
569 | struct inode *inode = list_entry(wb->b_io.prev, | 469 | struct inode *inode = list_entry(wb->b_io.prev, |
570 | struct inode, i_list); | 470 | struct inode, i_list); |
571 | if (wbc->sb && sb != inode->i_sb) { | 471 | |
572 | /* super block given and doesn't | 472 | if (inode->i_sb != sb) { |
573 | match, skip this inode */ | 473 | if (only_this_sb) { |
574 | redirty_tail(inode); | 474 | /* |
575 | continue; | 475 | * We only want to write back data for this |
576 | } | 476 | * superblock, move all inodes not belonging |
577 | if (sb != inode->i_sb) | 477 | * to it back onto the dirty list. |
578 | /* finish with this superblock */ | 478 | */ |
479 | redirty_tail(inode); | ||
480 | continue; | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * The inode belongs to a different superblock. | ||
485 | * Bounce back to the caller to unpin this and | ||
486 | * pin the next superblock. | ||
487 | */ | ||
579 | return 0; | 488 | return 0; |
489 | } | ||
490 | |||
580 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 491 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
581 | requeue_io(inode); | 492 | requeue_io(inode); |
582 | continue; | 493 | continue; |
@@ -614,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb, | |||
614 | return 1; | 525 | return 1; |
615 | } | 526 | } |
616 | 527 | ||
617 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 528 | void writeback_inodes_wb(struct bdi_writeback *wb, |
618 | struct writeback_control *wbc) | 529 | struct writeback_control *wbc) |
619 | { | 530 | { |
620 | int ret = 0; | 531 | int ret = 0; |
621 | 532 | ||
@@ -629,29 +540,12 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
629 | struct inode, i_list); | 540 | struct inode, i_list); |
630 | struct super_block *sb = inode->i_sb; | 541 | struct super_block *sb = inode->i_sb; |
631 | 542 | ||
632 | if (wbc->sb) { | 543 | if (!pin_sb_for_writeback(sb)) { |
633 | /* | 544 | requeue_io(inode); |
634 | * We are requested to write out inodes for a specific | 545 | continue; |
635 | * superblock. This means we already have s_umount | ||
636 | * taken by the caller which also waits for us to | ||
637 | * complete the writeout. | ||
638 | */ | ||
639 | if (sb != wbc->sb) { | ||
640 | redirty_tail(inode); | ||
641 | continue; | ||
642 | } | ||
643 | |||
644 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
645 | |||
646 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
647 | } else { | ||
648 | if (!pin_sb_for_writeback(sb)) { | ||
649 | requeue_io(inode); | ||
650 | continue; | ||
651 | } | ||
652 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
653 | drop_super(sb); | ||
654 | } | 546 | } |
547 | ret = writeback_sb_inodes(sb, wb, wbc, false); | ||
548 | drop_super(sb); | ||
655 | 549 | ||
656 | if (ret) | 550 | if (ret) |
657 | break; | 551 | break; |
@@ -660,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
660 | /* Leave any unwritten inodes on b_io */ | 554 | /* Leave any unwritten inodes on b_io */ |
661 | } | 555 | } |
662 | 556 | ||
663 | void writeback_inodes_wbc(struct writeback_control *wbc) | 557 | static void __writeback_inodes_sb(struct super_block *sb, |
558 | struct bdi_writeback *wb, struct writeback_control *wbc) | ||
664 | { | 559 | { |
665 | struct backing_dev_info *bdi = wbc->bdi; | 560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
666 | 561 | ||
667 | writeback_inodes_wb(&bdi->wb, wbc); | 562 | wbc->wb_start = jiffies; /* livelock avoidance */ |
563 | spin_lock(&inode_lock); | ||
564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
565 | queue_io(wb, wbc->older_than_this); | ||
566 | writeback_sb_inodes(sb, wb, wbc, true); | ||
567 | spin_unlock(&inode_lock); | ||
668 | } | 568 | } |
669 | 569 | ||
670 | /* | 570 | /* |
@@ -702,16 +602,14 @@ static inline bool over_bground_thresh(void) | |||
702 | * all dirty pages if they are all attached to "old" mappings. | 602 | * all dirty pages if they are all attached to "old" mappings. |
703 | */ | 603 | */ |
704 | static long wb_writeback(struct bdi_writeback *wb, | 604 | static long wb_writeback(struct bdi_writeback *wb, |
705 | struct wb_writeback_args *args) | 605 | struct wb_writeback_work *work) |
706 | { | 606 | { |
707 | struct writeback_control wbc = { | 607 | struct writeback_control wbc = { |
708 | .bdi = wb->bdi, | 608 | .sync_mode = work->sync_mode, |
709 | .sb = args->sb, | ||
710 | .sync_mode = args->sync_mode, | ||
711 | .older_than_this = NULL, | 609 | .older_than_this = NULL, |
712 | .for_kupdate = args->for_kupdate, | 610 | .for_kupdate = work->for_kupdate, |
713 | .for_background = args->for_background, | 611 | .for_background = work->for_background, |
714 | .range_cyclic = args->range_cyclic, | 612 | .range_cyclic = work->range_cyclic, |
715 | }; | 613 | }; |
716 | unsigned long oldest_jif; | 614 | unsigned long oldest_jif; |
717 | long wrote = 0; | 615 | long wrote = 0; |
@@ -731,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
731 | /* | 629 | /* |
732 | * Stop writeback when nr_pages has been consumed | 630 | * Stop writeback when nr_pages has been consumed |
733 | */ | 631 | */ |
734 | if (args->nr_pages <= 0) | 632 | if (work->nr_pages <= 0) |
735 | break; | 633 | break; |
736 | 634 | ||
737 | /* | 635 | /* |
738 | * For background writeout, stop when we are below the | 636 | * For background writeout, stop when we are below the |
739 | * background dirty threshold | 637 | * background dirty threshold |
740 | */ | 638 | */ |
741 | if (args->for_background && !over_bground_thresh()) | 639 | if (work->for_background && !over_bground_thresh()) |
742 | break; | 640 | break; |
743 | 641 | ||
744 | wbc.more_io = 0; | 642 | wbc.more_io = 0; |
745 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
746 | wbc.pages_skipped = 0; | 644 | wbc.pages_skipped = 0; |
747 | writeback_inodes_wb(wb, &wbc); | 645 | if (work->sb) |
748 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 646 | __writeback_inodes_sb(work->sb, wb, &wbc); |
647 | else | ||
648 | writeback_inodes_wb(wb, &wbc); | ||
649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
749 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
750 | 651 | ||
751 | /* | 652 | /* |
@@ -781,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
781 | } | 682 | } |
782 | 683 | ||
783 | /* | 684 | /* |
784 | * Return the next bdi_work struct that hasn't been processed by this | 685 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
785 | * wb thread yet. ->seen is initially set for each thread that exists | ||
786 | * for this device, when a thread first notices a piece of work it | ||
787 | * clears its bit. Depending on writeback type, the thread will notify | ||
788 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
789 | * it is done (WB_SYNC_ALL). | ||
790 | */ | 686 | */ |
791 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | 687 | static struct wb_writeback_work * |
792 | struct bdi_writeback *wb) | 688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) |
793 | { | 689 | { |
794 | struct bdi_work *work, *ret = NULL; | 690 | struct wb_writeback_work *work = NULL; |
795 | |||
796 | rcu_read_lock(); | ||
797 | 691 | ||
798 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | 692 | spin_lock(&bdi->wb_lock); |
799 | if (!test_bit(wb->nr, &work->seen)) | 693 | if (!list_empty(&bdi->work_list)) { |
800 | continue; | 694 | work = list_entry(bdi->work_list.next, |
801 | clear_bit(wb->nr, &work->seen); | 695 | struct wb_writeback_work, list); |
802 | 696 | list_del_init(&work->list); | |
803 | ret = work; | ||
804 | break; | ||
805 | } | 697 | } |
806 | 698 | spin_unlock(&bdi->wb_lock); | |
807 | rcu_read_unlock(); | 699 | return work; |
808 | return ret; | ||
809 | } | 700 | } |
810 | 701 | ||
811 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 702 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
@@ -830,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
830 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 721 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
831 | 722 | ||
832 | if (nr_pages) { | 723 | if (nr_pages) { |
833 | struct wb_writeback_args args = { | 724 | struct wb_writeback_work work = { |
834 | .nr_pages = nr_pages, | 725 | .nr_pages = nr_pages, |
835 | .sync_mode = WB_SYNC_NONE, | 726 | .sync_mode = WB_SYNC_NONE, |
836 | .for_kupdate = 1, | 727 | .for_kupdate = 1, |
837 | .range_cyclic = 1, | 728 | .range_cyclic = 1, |
838 | }; | 729 | }; |
839 | 730 | ||
840 | return wb_writeback(wb, &args); | 731 | return wb_writeback(wb, &work); |
841 | } | 732 | } |
842 | 733 | ||
843 | return 0; | 734 | return 0; |
@@ -849,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
849 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 740 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) |
850 | { | 741 | { |
851 | struct backing_dev_info *bdi = wb->bdi; | 742 | struct backing_dev_info *bdi = wb->bdi; |
852 | struct bdi_work *work; | 743 | struct wb_writeback_work *work; |
853 | long wrote = 0; | 744 | long wrote = 0; |
854 | 745 | ||
855 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
856 | struct wb_writeback_args args = work->args; | ||
857 | |||
858 | /* | 747 | /* |
859 | * Override sync mode, in case we must wait for completion | 748 | * Override sync mode, in case we must wait for completion |
749 | * because this thread is exiting now. | ||
860 | */ | 750 | */ |
861 | if (force_wait) | 751 | if (force_wait) |
862 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 752 | work->sync_mode = WB_SYNC_ALL; |
863 | |||
864 | /* | ||
865 | * If this isn't a data integrity operation, just notify | ||
866 | * that we have seen this work and we are now starting it. | ||
867 | */ | ||
868 | if (!test_bit(WS_ONSTACK, &work->state)) | ||
869 | wb_clear_pending(wb, work); | ||
870 | 753 | ||
871 | wrote += wb_writeback(wb, &args); | 754 | wrote += wb_writeback(wb, work); |
872 | 755 | ||
873 | /* | 756 | /* |
874 | * This is a data integrity writeback, so only do the | 757 | * Notify the caller of completion if this is a synchronous |
875 | * notification when we have completed the work. | 758 | * work item, otherwise just free it. |
876 | */ | 759 | */ |
877 | if (test_bit(WS_ONSTACK, &work->state)) | 760 | if (work->done) |
878 | wb_clear_pending(wb, work); | 761 | complete(work->done); |
762 | else | ||
763 | kfree(work); | ||
879 | } | 764 | } |
880 | 765 | ||
881 | /* | 766 | /* |
@@ -938,14 +823,9 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
938 | void wakeup_flusher_threads(long nr_pages) | 823 | void wakeup_flusher_threads(long nr_pages) |
939 | { | 824 | { |
940 | struct backing_dev_info *bdi; | 825 | struct backing_dev_info *bdi; |
941 | struct wb_writeback_args args = { | ||
942 | .sync_mode = WB_SYNC_NONE, | ||
943 | }; | ||
944 | 826 | ||
945 | if (nr_pages) { | 827 | if (!nr_pages) { |
946 | args.nr_pages = nr_pages; | 828 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
947 | } else { | ||
948 | args.nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
949 | global_page_state(NR_UNSTABLE_NFS); | 829 | global_page_state(NR_UNSTABLE_NFS); |
950 | } | 830 | } |
951 | 831 | ||
@@ -953,7 +833,7 @@ void wakeup_flusher_threads(long nr_pages) | |||
953 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 833 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
954 | if (!bdi_has_dirty_io(bdi)) | 834 | if (!bdi_has_dirty_io(bdi)) |
955 | continue; | 835 | continue; |
956 | bdi_alloc_queue_work(bdi, &args); | 836 | __bdi_start_writeback(bdi, nr_pages, false, false); |
957 | } | 837 | } |
958 | rcu_read_unlock(); | 838 | rcu_read_unlock(); |
959 | } | 839 | } |
@@ -1162,17 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1162 | { | 1042 | { |
1163 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1043 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
1164 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1044 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
1165 | struct wb_writeback_args args = { | 1045 | DECLARE_COMPLETION_ONSTACK(done); |
1046 | struct wb_writeback_work work = { | ||
1166 | .sb = sb, | 1047 | .sb = sb, |
1167 | .sync_mode = WB_SYNC_NONE, | 1048 | .sync_mode = WB_SYNC_NONE, |
1049 | .done = &done, | ||
1168 | }; | 1050 | }; |
1169 | 1051 | ||
1170 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1052 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1171 | 1053 | ||
1172 | args.nr_pages = nr_dirty + nr_unstable + | 1054 | work.nr_pages = nr_dirty + nr_unstable + |
1173 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1055 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
1174 | 1056 | ||
1175 | bdi_queue_work_onstack(&args); | 1057 | bdi_queue_work(sb->s_bdi, &work); |
1058 | wait_for_completion(&done); | ||
1176 | } | 1059 | } |
1177 | EXPORT_SYMBOL(writeback_inodes_sb); | 1060 | EXPORT_SYMBOL(writeback_inodes_sb); |
1178 | 1061 | ||
@@ -1204,16 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | |||
1204 | */ | 1087 | */ |
1205 | void sync_inodes_sb(struct super_block *sb) | 1088 | void sync_inodes_sb(struct super_block *sb) |
1206 | { | 1089 | { |
1207 | struct wb_writeback_args args = { | 1090 | DECLARE_COMPLETION_ONSTACK(done); |
1091 | struct wb_writeback_work work = { | ||
1208 | .sb = sb, | 1092 | .sb = sb, |
1209 | .sync_mode = WB_SYNC_ALL, | 1093 | .sync_mode = WB_SYNC_ALL, |
1210 | .nr_pages = LONG_MAX, | 1094 | .nr_pages = LONG_MAX, |
1211 | .range_cyclic = 0, | 1095 | .range_cyclic = 0, |
1096 | .done = &done, | ||
1212 | }; | 1097 | }; |
1213 | 1098 | ||
1214 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1099 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1215 | 1100 | ||
1216 | bdi_queue_work_onstack(&args); | 1101 | bdi_queue_work(sb->s_bdi, &work); |
1102 | wait_for_completion(&done); | ||
1103 | |||
1217 | wait_sb_inodes(sb); | 1104 | wait_sb_inodes(sb); |
1218 | } | 1105 | } |
1219 | EXPORT_SYMBOL(sync_inodes_sb); | 1106 | EXPORT_SYMBOL(sync_inodes_sb); |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 4a48c0f4b402..84da64b551b2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
1041 | 1041 | ||
1042 | if (gfs2_is_stuffed(ip)) { | 1042 | if (gfs2_is_stuffed(ip)) { |
1043 | u64 dsize = size + sizeof(struct gfs2_inode); | 1043 | u64 dsize = size + sizeof(struct gfs2_inode); |
1044 | ip->i_disksize = size; | ||
1044 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1045 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1045 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1046 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1046 | gfs2_dinode_out(ip, dibh->b_data); | 1047 | gfs2_dinode_out(ip, dibh->b_data); |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 8295c5b5d4a9..26ca3361a8bc 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, | |||
392 | unsigned totlen = be16_to_cpu(dent->de_rec_len); | 392 | unsigned totlen = be16_to_cpu(dent->de_rec_len); |
393 | 393 | ||
394 | if (gfs2_dirent_sentinel(dent)) | 394 | if (gfs2_dirent_sentinel(dent)) |
395 | actual = GFS2_DIRENT_SIZE(0); | 395 | actual = 0; |
396 | if (totlen - actual >= required) | 396 | if (totlen - actual >= required) |
397 | return 1; | 397 | return 1; |
398 | return 0; | 398 | return 0; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ddcdbf493536..0898f3ec8212 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work) | |||
706 | { | 706 | { |
707 | unsigned long delay = 0; | 707 | unsigned long delay = 0; |
708 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); | 708 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); |
709 | struct gfs2_holder *gh; | ||
709 | int drop_ref = 0; | 710 | int drop_ref = 0; |
710 | 711 | ||
712 | if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) { | ||
713 | spin_lock(&gl->gl_spin); | ||
714 | gh = find_first_waiter(gl); | ||
715 | if (gh && (gh->gh_flags & LM_FLAG_NOEXP) && | ||
716 | test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) | ||
717 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); | ||
718 | spin_unlock(&gl->gl_spin); | ||
719 | } | ||
720 | |||
711 | if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { | 721 | if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { |
712 | finish_xmote(gl, gl->gl_reply); | 722 | finish_xmote(gl, gl->gl_reply); |
713 | drop_ref = 1; | 723 | drop_ref = 1; |
@@ -1348,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) | |||
1348 | } | 1358 | } |
1349 | 1359 | ||
1350 | 1360 | ||
1351 | static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) | 1361 | static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) |
1352 | { | 1362 | { |
1353 | struct gfs2_glock *gl; | 1363 | struct gfs2_glock *gl; |
1354 | int may_demote; | 1364 | int may_demote; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b5612cbb62a5..f03afd9c44bc 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, | |||
169 | { | 169 | { |
170 | struct inode *inode; | 170 | struct inode *inode; |
171 | struct gfs2_inode *ip; | 171 | struct gfs2_inode *ip; |
172 | struct gfs2_glock *io_gl; | 172 | struct gfs2_glock *io_gl = NULL; |
173 | int error; | 173 | int error; |
174 | 174 | ||
175 | inode = gfs2_iget(sb, no_addr); | 175 | inode = gfs2_iget(sb, no_addr); |
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, | |||
198 | ip->i_iopen_gh.gh_gl->gl_object = ip; | 198 | ip->i_iopen_gh.gh_gl->gl_object = ip; |
199 | 199 | ||
200 | gfs2_glock_put(io_gl); | 200 | gfs2_glock_put(io_gl); |
201 | io_gl = NULL; | ||
201 | 202 | ||
202 | if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) | 203 | if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) |
203 | goto gfs2_nfsbypass; | 204 | goto gfs2_nfsbypass; |
@@ -228,7 +229,8 @@ gfs2_nfsbypass: | |||
228 | fail_glock: | 229 | fail_glock: |
229 | gfs2_glock_dq(&ip->i_iopen_gh); | 230 | gfs2_glock_dq(&ip->i_iopen_gh); |
230 | fail_iopen: | 231 | fail_iopen: |
231 | gfs2_glock_put(io_gl); | 232 | if (io_gl) |
233 | gfs2_glock_put(io_gl); | ||
232 | fail_put: | 234 | fail_put: |
233 | if (inode->i_state & I_NEW) | 235 | if (inode->i_state & I_NEW) |
234 | ip->i_gl->gl_object = NULL; | 236 | ip->i_gl->gl_object = NULL; |
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) | |||
256 | { | 258 | { |
257 | struct gfs2_sbd *sdp; | 259 | struct gfs2_sbd *sdp; |
258 | struct gfs2_inode *ip; | 260 | struct gfs2_inode *ip; |
259 | struct gfs2_glock *io_gl; | 261 | struct gfs2_glock *io_gl = NULL; |
260 | int error; | 262 | int error; |
261 | struct gfs2_holder gh; | 263 | struct gfs2_holder gh; |
262 | struct inode *inode; | 264 | struct inode *inode; |
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) | |||
293 | 295 | ||
294 | ip->i_iopen_gh.gh_gl->gl_object = ip; | 296 | ip->i_iopen_gh.gh_gl->gl_object = ip; |
295 | gfs2_glock_put(io_gl); | 297 | gfs2_glock_put(io_gl); |
298 | io_gl = NULL; | ||
296 | 299 | ||
297 | inode->i_mode = DT2IF(DT_UNKNOWN); | 300 | inode->i_mode = DT2IF(DT_UNKNOWN); |
298 | 301 | ||
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) | |||
319 | fail_glock: | 322 | fail_glock: |
320 | gfs2_glock_dq(&ip->i_iopen_gh); | 323 | gfs2_glock_dq(&ip->i_iopen_gh); |
321 | fail_iopen: | 324 | fail_iopen: |
322 | gfs2_glock_put(io_gl); | 325 | if (io_gl) |
326 | gfs2_glock_put(io_gl); | ||
323 | fail_put: | 327 | fail_put: |
324 | ip->i_gl->gl_object = NULL; | 328 | ip->i_gl->gl_object = NULL; |
325 | gfs2_glock_put(ip->i_gl); | 329 | gfs2_glock_put(ip->i_gl); |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 49667d68769e..8f02d3db8f42 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list); | |||
77 | static atomic_t qd_lru_count = ATOMIC_INIT(0); | 77 | static atomic_t qd_lru_count = ATOMIC_INIT(0); |
78 | static DEFINE_SPINLOCK(qd_lru_lock); | 78 | static DEFINE_SPINLOCK(qd_lru_lock); |
79 | 79 | ||
80 | int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) | 80 | int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) |
81 | { | 81 | { |
82 | struct gfs2_quota_data *qd; | 82 | struct gfs2_quota_data *qd; |
83 | struct gfs2_sbd *sdp; | 83 | struct gfs2_sbd *sdp; |
@@ -694,10 +694,8 @@ get_a_page: | |||
694 | if (!buffer_mapped(bh)) | 694 | if (!buffer_mapped(bh)) |
695 | goto unlock_out; | 695 | goto unlock_out; |
696 | /* If it's a newly allocated disk block for quota, zero it */ | 696 | /* If it's a newly allocated disk block for quota, zero it */ |
697 | if (buffer_new(bh)) { | 697 | if (buffer_new(bh)) |
698 | memset(bh->b_data, 0, bh->b_size); | 698 | zero_user(page, pos - blocksize, bh->b_size); |
699 | set_buffer_uptodate(bh); | ||
700 | } | ||
701 | } | 699 | } |
702 | 700 | ||
703 | if (PageUptodate(page)) | 701 | if (PageUptodate(page)) |
@@ -723,7 +721,7 @@ get_a_page: | |||
723 | 721 | ||
724 | /* If quota straddles page boundary, we need to update the rest of the | 722 | /* If quota straddles page boundary, we need to update the rest of the |
725 | * quota at the beginning of the next page */ | 723 | * quota at the beginning of the next page */ |
726 | if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */ | 724 | if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) { |
727 | ptr = ptr + nbytes; | 725 | ptr = ptr + nbytes; |
728 | nbytes = sizeof(struct gfs2_quota) - nbytes; | 726 | nbytes = sizeof(struct gfs2_quota) - nbytes; |
729 | offset = 0; | 727 | offset = 0; |
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 195f60c8bd14..e7d236ca48bd 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
51 | return ret; | 51 | return ret; |
52 | } | 52 | } |
53 | 53 | ||
54 | extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); | 54 | extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); |
55 | extern const struct quotactl_ops gfs2_quotactl_ops; | 55 | extern const struct quotactl_ops gfs2_quotactl_ops; |
56 | 56 | ||
57 | #endif /* __QUOTA_DOT_H__ */ | 57 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/inode.c b/fs/inode.c index 2bee20ae3d65..722860b323a9 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan) | |||
512 | * This function is passed the number of inodes to scan, and it returns the | 512 | * This function is passed the number of inodes to scan, and it returns the |
513 | * total number of remaining possibly-reclaimable inodes. | 513 | * total number of remaining possibly-reclaimable inodes. |
514 | */ | 514 | */ |
515 | static int shrink_icache_memory(int nr, gfp_t gfp_mask) | 515 | static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) |
516 | { | 516 | { |
517 | if (nr) { | 517 | if (nr) { |
518 | /* | 518 | /* |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index bc2ff5932769..036880895bfc 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | |||
297 | struct page *new_page; | 297 | struct page *new_page; |
298 | unsigned int new_offset; | 298 | unsigned int new_offset; |
299 | struct buffer_head *bh_in = jh2bh(jh_in); | 299 | struct buffer_head *bh_in = jh2bh(jh_in); |
300 | struct jbd2_buffer_trigger_type *triggers; | ||
301 | journal_t *journal = transaction->t_journal; | 300 | journal_t *journal = transaction->t_journal; |
302 | 301 | ||
303 | /* | 302 | /* |
@@ -328,21 +327,21 @@ repeat: | |||
328 | done_copy_out = 1; | 327 | done_copy_out = 1; |
329 | new_page = virt_to_page(jh_in->b_frozen_data); | 328 | new_page = virt_to_page(jh_in->b_frozen_data); |
330 | new_offset = offset_in_page(jh_in->b_frozen_data); | 329 | new_offset = offset_in_page(jh_in->b_frozen_data); |
331 | triggers = jh_in->b_frozen_triggers; | ||
332 | } else { | 330 | } else { |
333 | new_page = jh2bh(jh_in)->b_page; | 331 | new_page = jh2bh(jh_in)->b_page; |
334 | new_offset = offset_in_page(jh2bh(jh_in)->b_data); | 332 | new_offset = offset_in_page(jh2bh(jh_in)->b_data); |
335 | triggers = jh_in->b_triggers; | ||
336 | } | 333 | } |
337 | 334 | ||
338 | mapped_data = kmap_atomic(new_page, KM_USER0); | 335 | mapped_data = kmap_atomic(new_page, KM_USER0); |
339 | /* | 336 | /* |
340 | * Fire any commit trigger. Do this before checking for escaping, | 337 | * Fire data frozen trigger if data already wasn't frozen. Do this |
341 | * as the trigger may modify the magic offset. If a copy-out | 338 | * before checking for escaping, as the trigger may modify the magic |
342 | * happens afterwards, it will have the correct data in the buffer. | 339 | * offset. If a copy-out happens afterwards, it will have the correct |
340 | * data in the buffer. | ||
343 | */ | 341 | */ |
344 | jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, | 342 | if (!done_copy_out) |
345 | triggers); | 343 | jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset, |
344 | jh_in->b_triggers); | ||
346 | 345 | ||
347 | /* | 346 | /* |
348 | * Check for escaping | 347 | * Check for escaping |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e214d68620ac..b8e0806681bb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -725,6 +725,9 @@ done: | |||
725 | page = jh2bh(jh)->b_page; | 725 | page = jh2bh(jh)->b_page; |
726 | offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; | 726 | offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; |
727 | source = kmap_atomic(page, KM_USER0); | 727 | source = kmap_atomic(page, KM_USER0); |
728 | /* Fire data frozen trigger just before we copy the data */ | ||
729 | jbd2_buffer_frozen_trigger(jh, source + offset, | ||
730 | jh->b_triggers); | ||
728 | memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); | 731 | memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); |
729 | kunmap_atomic(source, KM_USER0); | 732 | kunmap_atomic(source, KM_USER0); |
730 | 733 | ||
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh, | |||
963 | jh->b_triggers = type; | 966 | jh->b_triggers = type; |
964 | } | 967 | } |
965 | 968 | ||
966 | void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, | 969 | void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, |
967 | struct jbd2_buffer_trigger_type *triggers) | 970 | struct jbd2_buffer_trigger_type *triggers) |
968 | { | 971 | { |
969 | struct buffer_head *bh = jh2bh(jh); | 972 | struct buffer_head *bh = jh2bh(jh); |
970 | 973 | ||
971 | if (!triggers || !triggers->t_commit) | 974 | if (!triggers || !triggers->t_frozen) |
972 | return; | 975 | return; |
973 | 976 | ||
974 | triggers->t_commit(triggers, bh, mapped_data, bh->b_size); | 977 | triggers->t_frozen(triggers, bh, mapped_data, bh->b_size); |
975 | } | 978 | } |
976 | 979 | ||
977 | void jbd2_buffer_abort_trigger(struct journal_head *jh, | 980 | void jbd2_buffer_abort_trigger(struct journal_head *jh, |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index a2d58c96f1b4..d258e261bdc7 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i | |||
626 | 626 | ||
627 | static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) | 627 | static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) |
628 | { | 628 | { |
629 | /* success of check_xattr_ref_inode() means taht inode (ic) dose not have | 629 | /* success of check_xattr_ref_inode() means that inode (ic) dose not have |
630 | * duplicate name/value pairs. If duplicate name/value pair would be found, | 630 | * duplicate name/value pairs. If duplicate name/value pair would be found, |
631 | * one will be removed. | 631 | * one will be removed. |
632 | */ | 632 | */ |
diff --git a/fs/mbcache.c b/fs/mbcache.c index ec88ff3d04a9..e28f21b95344 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache) | |||
115 | * What the mbcache registers as to get shrunk dynamically. | 115 | * What the mbcache registers as to get shrunk dynamically. |
116 | */ | 116 | */ |
117 | 117 | ||
118 | static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); | 118 | static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); |
119 | 119 | ||
120 | static struct shrinker mb_cache_shrinker = { | 120 | static struct shrinker mb_cache_shrinker = { |
121 | .shrink = mb_cache_shrink_fn, | 121 | .shrink = mb_cache_shrink_fn, |
@@ -191,13 +191,14 @@ forget: | |||
191 | * This function is called by the kernel memory management when memory | 191 | * This function is called by the kernel memory management when memory |
192 | * gets low. | 192 | * gets low. |
193 | * | 193 | * |
194 | * @shrink: (ignored) | ||
194 | * @nr_to_scan: Number of objects to scan | 195 | * @nr_to_scan: Number of objects to scan |
195 | * @gfp_mask: (ignored) | 196 | * @gfp_mask: (ignored) |
196 | * | 197 | * |
197 | * Returns the number of objects which are present in the cache. | 198 | * Returns the number of objects which are present in the cache. |
198 | */ | 199 | */ |
199 | static int | 200 | static int |
200 | mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) | 201 | mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
201 | { | 202 | { |
202 | LIST_HEAD(free_list); | 203 | LIST_HEAD(free_list); |
203 | struct list_head *l, *ltmp; | 204 | struct list_head *l, *ltmp; |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 782b431ef91c..e60416d3f818 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head) | |||
1710 | } | 1710 | } |
1711 | } | 1711 | } |
1712 | 1712 | ||
1713 | int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) | 1713 | int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
1714 | { | 1714 | { |
1715 | LIST_HEAD(head); | 1715 | LIST_HEAD(head); |
1716 | struct nfs_inode *nfsi; | 1716 | struct nfs_inode *nfsi; |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d8bd619e386c..e70f44b9b3f4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[]; | |||
205 | void nfs_close_context(struct nfs_open_context *ctx, int is_sync); | 205 | void nfs_close_context(struct nfs_open_context *ctx, int is_sync); |
206 | 206 | ||
207 | /* dir.c */ | 207 | /* dir.c */ |
208 | extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); | 208 | extern int nfs_access_cache_shrinker(struct shrinker *shrink, |
209 | int nr_to_scan, gfp_t gfp_mask); | ||
209 | 210 | ||
210 | /* inode.c */ | 211 | /* inode.c */ |
211 | extern struct workqueue_struct *nfsiod_workqueue; | 212 | extern struct workqueue_struct *nfsiod_workqueue; |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3623ca20cc18..356e976772bf 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
196 | dump_stack(); | 196 | dump_stack(); |
197 | goto bail; | 197 | goto bail; |
198 | } | 198 | } |
199 | |||
200 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | ||
201 | mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, | ||
202 | (unsigned long long)past_eof); | ||
203 | |||
204 | if (create && (iblock >= past_eof)) | ||
205 | set_buffer_new(bh_result); | ||
206 | } | 199 | } |
207 | 200 | ||
201 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | ||
202 | mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, | ||
203 | (unsigned long long)past_eof); | ||
204 | if (create && (iblock >= past_eof)) | ||
205 | set_buffer_new(bh_result); | ||
206 | |||
208 | bail: | 207 | bail: |
209 | if (err < 0) | 208 | if (err < 0) |
210 | err = -EIO; | 209 | err = -EIO; |
@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle, | |||
459 | return ret; | 458 | return ret; |
460 | } | 459 | } |
461 | 460 | ||
462 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | ||
463 | struct page *page, | ||
464 | unsigned from, | ||
465 | unsigned to) | ||
466 | { | ||
467 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
468 | handle_t *handle; | ||
469 | int ret = 0; | ||
470 | |||
471 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
472 | if (IS_ERR(handle)) { | ||
473 | ret = -ENOMEM; | ||
474 | mlog_errno(ret); | ||
475 | goto out; | ||
476 | } | ||
477 | |||
478 | if (ocfs2_should_order_data(inode)) { | ||
479 | ret = ocfs2_jbd2_file_inode(handle, inode); | ||
480 | if (ret < 0) | ||
481 | mlog_errno(ret); | ||
482 | } | ||
483 | out: | ||
484 | if (ret) { | ||
485 | if (!IS_ERR(handle)) | ||
486 | ocfs2_commit_trans(osb, handle); | ||
487 | handle = ERR_PTR(ret); | ||
488 | } | ||
489 | return handle; | ||
490 | } | ||
491 | |||
492 | static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | 461 | static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) |
493 | { | 462 | { |
494 | sector_t status; | 463 | sector_t status; |
@@ -1131,23 +1100,37 @@ out: | |||
1131 | */ | 1100 | */ |
1132 | static int ocfs2_grab_pages_for_write(struct address_space *mapping, | 1101 | static int ocfs2_grab_pages_for_write(struct address_space *mapping, |
1133 | struct ocfs2_write_ctxt *wc, | 1102 | struct ocfs2_write_ctxt *wc, |
1134 | u32 cpos, loff_t user_pos, int new, | 1103 | u32 cpos, loff_t user_pos, |
1104 | unsigned user_len, int new, | ||
1135 | struct page *mmap_page) | 1105 | struct page *mmap_page) |
1136 | { | 1106 | { |
1137 | int ret = 0, i; | 1107 | int ret = 0, i; |
1138 | unsigned long start, target_index, index; | 1108 | unsigned long start, target_index, end_index, index; |
1139 | struct inode *inode = mapping->host; | 1109 | struct inode *inode = mapping->host; |
1110 | loff_t last_byte; | ||
1140 | 1111 | ||
1141 | target_index = user_pos >> PAGE_CACHE_SHIFT; | 1112 | target_index = user_pos >> PAGE_CACHE_SHIFT; |
1142 | 1113 | ||
1143 | /* | 1114 | /* |
1144 | * Figure out how many pages we'll be manipulating here. For | 1115 | * Figure out how many pages we'll be manipulating here. For |
1145 | * non allocating write, we just change the one | 1116 | * non allocating write, we just change the one |
1146 | * page. Otherwise, we'll need a whole clusters worth. | 1117 | * page. Otherwise, we'll need a whole clusters worth. If we're |
1118 | * writing past i_size, we only need enough pages to cover the | ||
1119 | * last page of the write. | ||
1147 | */ | 1120 | */ |
1148 | if (new) { | 1121 | if (new) { |
1149 | wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); | 1122 | wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); |
1150 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); | 1123 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); |
1124 | /* | ||
1125 | * We need the index *past* the last page we could possibly | ||
1126 | * touch. This is the page past the end of the write or | ||
1127 | * i_size, whichever is greater. | ||
1128 | */ | ||
1129 | last_byte = max(user_pos + user_len, i_size_read(inode)); | ||
1130 | BUG_ON(last_byte < 1); | ||
1131 | end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1; | ||
1132 | if ((start + wc->w_num_pages) > end_index) | ||
1133 | wc->w_num_pages = end_index - start; | ||
1151 | } else { | 1134 | } else { |
1152 | wc->w_num_pages = 1; | 1135 | wc->w_num_pages = 1; |
1153 | start = target_index; | 1136 | start = target_index; |
@@ -1620,21 +1603,20 @@ out: | |||
1620 | * write path can treat it as an non-allocating write, which has no | 1603 | * write path can treat it as an non-allocating write, which has no |
1621 | * special case code for sparse/nonsparse files. | 1604 | * special case code for sparse/nonsparse files. |
1622 | */ | 1605 | */ |
1623 | static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, | 1606 | static int ocfs2_expand_nonsparse_inode(struct inode *inode, |
1624 | unsigned len, | 1607 | struct buffer_head *di_bh, |
1608 | loff_t pos, unsigned len, | ||
1625 | struct ocfs2_write_ctxt *wc) | 1609 | struct ocfs2_write_ctxt *wc) |
1626 | { | 1610 | { |
1627 | int ret; | 1611 | int ret; |
1628 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1629 | loff_t newsize = pos + len; | 1612 | loff_t newsize = pos + len; |
1630 | 1613 | ||
1631 | if (ocfs2_sparse_alloc(osb)) | 1614 | BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); |
1632 | return 0; | ||
1633 | 1615 | ||
1634 | if (newsize <= i_size_read(inode)) | 1616 | if (newsize <= i_size_read(inode)) |
1635 | return 0; | 1617 | return 0; |
1636 | 1618 | ||
1637 | ret = ocfs2_extend_no_holes(inode, newsize, pos); | 1619 | ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos); |
1638 | if (ret) | 1620 | if (ret) |
1639 | mlog_errno(ret); | 1621 | mlog_errno(ret); |
1640 | 1622 | ||
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, | |||
1644 | return ret; | 1626 | return ret; |
1645 | } | 1627 | } |
1646 | 1628 | ||
1629 | static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, | ||
1630 | loff_t pos) | ||
1631 | { | ||
1632 | int ret = 0; | ||
1633 | |||
1634 | BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))); | ||
1635 | if (pos > i_size_read(inode)) | ||
1636 | ret = ocfs2_zero_extend(inode, di_bh, pos); | ||
1637 | |||
1638 | return ret; | ||
1639 | } | ||
1640 | |||
1647 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 1641 | int ocfs2_write_begin_nolock(struct address_space *mapping, |
1648 | loff_t pos, unsigned len, unsigned flags, | 1642 | loff_t pos, unsigned len, unsigned flags, |
1649 | struct page **pagep, void **fsdata, | 1643 | struct page **pagep, void **fsdata, |
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1679 | } | 1673 | } |
1680 | } | 1674 | } |
1681 | 1675 | ||
1682 | ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); | 1676 | if (ocfs2_sparse_alloc(osb)) |
1677 | ret = ocfs2_zero_tail(inode, di_bh, pos); | ||
1678 | else | ||
1679 | ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len, | ||
1680 | wc); | ||
1683 | if (ret) { | 1681 | if (ret) { |
1684 | mlog_errno(ret); | 1682 | mlog_errno(ret); |
1685 | goto out; | 1683 | goto out; |
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1789 | * that we can zero and flush if we error after adding the | 1787 | * that we can zero and flush if we error after adding the |
1790 | * extent. | 1788 | * extent. |
1791 | */ | 1789 | */ |
1792 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, | 1790 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, |
1793 | cluster_of_pages, mmap_page); | 1791 | cluster_of_pages, mmap_page); |
1794 | if (ret) { | 1792 | if (ret) { |
1795 | mlog_errno(ret); | 1793 | mlog_errno(ret); |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6b5a492e1749..153abb5abef0 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain, | |||
1671 | struct dlm_ctxt *dlm = NULL; | 1671 | struct dlm_ctxt *dlm = NULL; |
1672 | struct dlm_ctxt *new_ctxt = NULL; | 1672 | struct dlm_ctxt *new_ctxt = NULL; |
1673 | 1673 | ||
1674 | if (strlen(domain) > O2NM_MAX_NAME_LEN) { | 1674 | if (strlen(domain) >= O2NM_MAX_NAME_LEN) { |
1675 | ret = -ENAMETOOLONG; | 1675 | ret = -ENAMETOOLONG; |
1676 | mlog(ML_ERROR, "domain name length too long\n"); | 1676 | mlog(ML_ERROR, "domain name length too long\n"); |
1677 | goto leave; | 1677 | goto leave; |
@@ -1709,6 +1709,7 @@ retry: | |||
1709 | } | 1709 | } |
1710 | 1710 | ||
1711 | if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { | 1711 | if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { |
1712 | spin_unlock(&dlm_domain_lock); | ||
1712 | mlog(ML_ERROR, | 1713 | mlog(ML_ERROR, |
1713 | "Requested locking protocol version is not " | 1714 | "Requested locking protocol version is not " |
1714 | "compatible with already registered domain " | 1715 | "compatible with already registered domain " |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 4a7506a4e314..94b97fc6a88e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2808,14 +2808,8 @@ again: | |||
2808 | mlog(0, "trying again...\n"); | 2808 | mlog(0, "trying again...\n"); |
2809 | goto again; | 2809 | goto again; |
2810 | } | 2810 | } |
2811 | /* now that we are sure the MIGRATING state is there, drop | ||
2812 | * the unneded state which blocked threads trying to DIRTY */ | ||
2813 | spin_lock(&res->spinlock); | ||
2814 | BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); | ||
2815 | BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); | ||
2816 | res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; | ||
2817 | spin_unlock(&res->spinlock); | ||
2818 | 2811 | ||
2812 | ret = 0; | ||
2819 | /* did the target go down or die? */ | 2813 | /* did the target go down or die? */ |
2820 | spin_lock(&dlm->spinlock); | 2814 | spin_lock(&dlm->spinlock); |
2821 | if (!test_bit(target, dlm->domain_map)) { | 2815 | if (!test_bit(target, dlm->domain_map)) { |
@@ -2826,9 +2820,21 @@ again: | |||
2826 | spin_unlock(&dlm->spinlock); | 2820 | spin_unlock(&dlm->spinlock); |
2827 | 2821 | ||
2828 | /* | 2822 | /* |
2823 | * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for | ||
2824 | * another try; otherwise, we are sure the MIGRATING state is there, | ||
2825 | * drop the unneded state which blocked threads trying to DIRTY | ||
2826 | */ | ||
2827 | spin_lock(&res->spinlock); | ||
2828 | BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); | ||
2829 | res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; | ||
2830 | if (!ret) | ||
2831 | BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); | ||
2832 | spin_unlock(&res->spinlock); | ||
2833 | |||
2834 | /* | ||
2829 | * at this point: | 2835 | * at this point: |
2830 | * | 2836 | * |
2831 | * o the DLM_LOCK_RES_MIGRATING flag is set | 2837 | * o the DLM_LOCK_RES_MIGRATING flag is set if target not down |
2832 | * o there are no pending asts on this lockres | 2838 | * o there are no pending asts on this lockres |
2833 | * o all processes trying to reserve an ast on this | 2839 | * o all processes trying to reserve an ast on this |
2834 | * lockres must wait for the MIGRATING flag to clear | 2840 | * lockres must wait for the MIGRATING flag to clear |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f8b75ce4be70..9dfaac73b36d 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) | |||
463 | if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { | 463 | if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { |
464 | int bit; | 464 | int bit; |
465 | 465 | ||
466 | bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); | 466 | bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0); |
467 | if (bit >= O2NM_MAX_NODES || bit < 0) | 467 | if (bit >= O2NM_MAX_NODES || bit < 0) |
468 | dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); | 468 | dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); |
469 | else | 469 | else |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6a13ea64c447..2b10b36d1577 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -724,28 +724,55 @@ leave: | |||
724 | return status; | 724 | return status; |
725 | } | 725 | } |
726 | 726 | ||
727 | /* | ||
728 | * While a write will already be ordering the data, a truncate will not. | ||
729 | * Thus, we need to explicitly order the zeroed pages. | ||
730 | */ | ||
731 | static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) | ||
732 | { | ||
733 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
734 | handle_t *handle = NULL; | ||
735 | int ret = 0; | ||
736 | |||
737 | if (!ocfs2_should_order_data(inode)) | ||
738 | goto out; | ||
739 | |||
740 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
741 | if (IS_ERR(handle)) { | ||
742 | ret = -ENOMEM; | ||
743 | mlog_errno(ret); | ||
744 | goto out; | ||
745 | } | ||
746 | |||
747 | ret = ocfs2_jbd2_file_inode(handle, inode); | ||
748 | if (ret < 0) | ||
749 | mlog_errno(ret); | ||
750 | |||
751 | out: | ||
752 | if (ret) { | ||
753 | if (!IS_ERR(handle)) | ||
754 | ocfs2_commit_trans(osb, handle); | ||
755 | handle = ERR_PTR(ret); | ||
756 | } | ||
757 | return handle; | ||
758 | } | ||
759 | |||
727 | /* Some parts of this taken from generic_cont_expand, which turned out | 760 | /* Some parts of this taken from generic_cont_expand, which turned out |
728 | * to be too fragile to do exactly what we need without us having to | 761 | * to be too fragile to do exactly what we need without us having to |
729 | * worry about recursive locking in ->write_begin() and ->write_end(). */ | 762 | * worry about recursive locking in ->write_begin() and ->write_end(). */ |
730 | static int ocfs2_write_zero_page(struct inode *inode, | 763 | static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, |
731 | u64 size) | 764 | u64 abs_to) |
732 | { | 765 | { |
733 | struct address_space *mapping = inode->i_mapping; | 766 | struct address_space *mapping = inode->i_mapping; |
734 | struct page *page; | 767 | struct page *page; |
735 | unsigned long index; | 768 | unsigned long index = abs_from >> PAGE_CACHE_SHIFT; |
736 | unsigned int offset; | ||
737 | handle_t *handle = NULL; | 769 | handle_t *handle = NULL; |
738 | int ret; | 770 | int ret = 0; |
771 | unsigned zero_from, zero_to, block_start, block_end; | ||
739 | 772 | ||
740 | offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ | 773 | BUG_ON(abs_from >= abs_to); |
741 | /* ugh. in prepare/commit_write, if from==to==start of block, we | 774 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); |
742 | ** skip the prepare. make sure we never send an offset for the start | 775 | BUG_ON(abs_from & (inode->i_blkbits - 1)); |
743 | ** of a block | ||
744 | */ | ||
745 | if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { | ||
746 | offset++; | ||
747 | } | ||
748 | index = size >> PAGE_CACHE_SHIFT; | ||
749 | 776 | ||
750 | page = grab_cache_page(mapping, index); | 777 | page = grab_cache_page(mapping, index); |
751 | if (!page) { | 778 | if (!page) { |
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode, | |||
754 | goto out; | 781 | goto out; |
755 | } | 782 | } |
756 | 783 | ||
757 | ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); | 784 | /* Get the offsets within the page that we want to zero */ |
758 | if (ret < 0) { | 785 | zero_from = abs_from & (PAGE_CACHE_SIZE - 1); |
759 | mlog_errno(ret); | 786 | zero_to = abs_to & (PAGE_CACHE_SIZE - 1); |
760 | goto out_unlock; | 787 | if (!zero_to) |
761 | } | 788 | zero_to = PAGE_CACHE_SIZE; |
762 | 789 | ||
763 | if (ocfs2_should_order_data(inode)) { | 790 | mlog(0, |
764 | handle = ocfs2_start_walk_page_trans(inode, page, offset, | 791 | "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n", |
765 | offset); | 792 | (unsigned long long)abs_from, (unsigned long long)abs_to, |
766 | if (IS_ERR(handle)) { | 793 | index, zero_from, zero_to); |
767 | ret = PTR_ERR(handle); | 794 | |
768 | handle = NULL; | 795 | /* We know that zero_from is block aligned */ |
796 | for (block_start = zero_from; block_start < zero_to; | ||
797 | block_start = block_end) { | ||
798 | block_end = block_start + (1 << inode->i_blkbits); | ||
799 | |||
800 | /* | ||
801 | * block_start is block-aligned. Bump it by one to | ||
802 | * force ocfs2_{prepare,commit}_write() to zero the | ||
803 | * whole block. | ||
804 | */ | ||
805 | ret = ocfs2_prepare_write_nolock(inode, page, | ||
806 | block_start + 1, | ||
807 | block_start + 1); | ||
808 | if (ret < 0) { | ||
809 | mlog_errno(ret); | ||
769 | goto out_unlock; | 810 | goto out_unlock; |
770 | } | 811 | } |
771 | } | ||
772 | 812 | ||
773 | /* must not update i_size! */ | 813 | if (!handle) { |
774 | ret = block_commit_write(page, offset, offset); | 814 | handle = ocfs2_zero_start_ordered_transaction(inode); |
775 | if (ret < 0) | 815 | if (IS_ERR(handle)) { |
776 | mlog_errno(ret); | 816 | ret = PTR_ERR(handle); |
777 | else | 817 | handle = NULL; |
778 | ret = 0; | 818 | break; |
819 | } | ||
820 | } | ||
821 | |||
822 | /* must not update i_size! */ | ||
823 | ret = block_commit_write(page, block_start + 1, | ||
824 | block_start + 1); | ||
825 | if (ret < 0) | ||
826 | mlog_errno(ret); | ||
827 | else | ||
828 | ret = 0; | ||
829 | } | ||
779 | 830 | ||
780 | if (handle) | 831 | if (handle) |
781 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 832 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
833 | |||
782 | out_unlock: | 834 | out_unlock: |
783 | unlock_page(page); | 835 | unlock_page(page); |
784 | page_cache_release(page); | 836 | page_cache_release(page); |
@@ -786,22 +838,114 @@ out: | |||
786 | return ret; | 838 | return ret; |
787 | } | 839 | } |
788 | 840 | ||
789 | static int ocfs2_zero_extend(struct inode *inode, | 841 | /* |
790 | u64 zero_to_size) | 842 | * Find the next range to zero. We do this in terms of bytes because |
843 | * that's what ocfs2_zero_extend() wants, and it is dealing with the | ||
844 | * pagecache. We may return multiple extents. | ||
845 | * | ||
846 | * zero_start and zero_end are ocfs2_zero_extend()s current idea of what | ||
847 | * needs to be zeroed. range_start and range_end return the next zeroing | ||
848 | * range. A subsequent call should pass the previous range_end as its | ||
849 | * zero_start. If range_end is 0, there's nothing to do. | ||
850 | * | ||
851 | * Unwritten extents are skipped over. Refcounted extents are CoWd. | ||
852 | */ | ||
853 | static int ocfs2_zero_extend_get_range(struct inode *inode, | ||
854 | struct buffer_head *di_bh, | ||
855 | u64 zero_start, u64 zero_end, | ||
856 | u64 *range_start, u64 *range_end) | ||
791 | { | 857 | { |
792 | int ret = 0; | 858 | int rc = 0, needs_cow = 0; |
793 | u64 start_off; | 859 | u32 p_cpos, zero_clusters = 0; |
794 | struct super_block *sb = inode->i_sb; | 860 | u32 zero_cpos = |
861 | zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
862 | u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end); | ||
863 | unsigned int num_clusters = 0; | ||
864 | unsigned int ext_flags = 0; | ||
795 | 865 | ||
796 | start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); | 866 | while (zero_cpos < last_cpos) { |
797 | while (start_off < zero_to_size) { | 867 | rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos, |
798 | ret = ocfs2_write_zero_page(inode, start_off); | 868 | &num_clusters, &ext_flags); |
799 | if (ret < 0) { | 869 | if (rc) { |
800 | mlog_errno(ret); | 870 | mlog_errno(rc); |
871 | goto out; | ||
872 | } | ||
873 | |||
874 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
875 | zero_clusters = num_clusters; | ||
876 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | ||
877 | needs_cow = 1; | ||
878 | break; | ||
879 | } | ||
880 | |||
881 | zero_cpos += num_clusters; | ||
882 | } | ||
883 | if (!zero_clusters) { | ||
884 | *range_end = 0; | ||
885 | goto out; | ||
886 | } | ||
887 | |||
888 | while ((zero_cpos + zero_clusters) < last_cpos) { | ||
889 | rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters, | ||
890 | &p_cpos, &num_clusters, | ||
891 | &ext_flags); | ||
892 | if (rc) { | ||
893 | mlog_errno(rc); | ||
801 | goto out; | 894 | goto out; |
802 | } | 895 | } |
803 | 896 | ||
804 | start_off += sb->s_blocksize; | 897 | if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)) |
898 | break; | ||
899 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | ||
900 | needs_cow = 1; | ||
901 | zero_clusters += num_clusters; | ||
902 | } | ||
903 | if ((zero_cpos + zero_clusters) > last_cpos) | ||
904 | zero_clusters = last_cpos - zero_cpos; | ||
905 | |||
906 | if (needs_cow) { | ||
907 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, | ||
908 | UINT_MAX); | ||
909 | if (rc) { | ||
910 | mlog_errno(rc); | ||
911 | goto out; | ||
912 | } | ||
913 | } | ||
914 | |||
915 | *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos); | ||
916 | *range_end = ocfs2_clusters_to_bytes(inode->i_sb, | ||
917 | zero_cpos + zero_clusters); | ||
918 | |||
919 | out: | ||
920 | return rc; | ||
921 | } | ||
922 | |||
923 | /* | ||
924 | * Zero one range returned from ocfs2_zero_extend_get_range(). The caller | ||
925 | * has made sure that the entire range needs zeroing. | ||
926 | */ | ||
927 | static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, | ||
928 | u64 range_end) | ||
929 | { | ||
930 | int rc = 0; | ||
931 | u64 next_pos; | ||
932 | u64 zero_pos = range_start; | ||
933 | |||
934 | mlog(0, "range_start = %llu, range_end = %llu\n", | ||
935 | (unsigned long long)range_start, | ||
936 | (unsigned long long)range_end); | ||
937 | BUG_ON(range_start >= range_end); | ||
938 | |||
939 | while (zero_pos < range_end) { | ||
940 | next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; | ||
941 | if (next_pos > range_end) | ||
942 | next_pos = range_end; | ||
943 | rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); | ||
944 | if (rc < 0) { | ||
945 | mlog_errno(rc); | ||
946 | break; | ||
947 | } | ||
948 | zero_pos = next_pos; | ||
805 | 949 | ||
806 | /* | 950 | /* |
807 | * Very large extends have the potential to lock up | 951 | * Very large extends have the potential to lock up |
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode, | |||
810 | cond_resched(); | 954 | cond_resched(); |
811 | } | 955 | } |
812 | 956 | ||
813 | out: | 957 | return rc; |
958 | } | ||
959 | |||
960 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, | ||
961 | loff_t zero_to_size) | ||
962 | { | ||
963 | int ret = 0; | ||
964 | u64 zero_start, range_start = 0, range_end = 0; | ||
965 | struct super_block *sb = inode->i_sb; | ||
966 | |||
967 | zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); | ||
968 | mlog(0, "zero_start %llu for i_size %llu\n", | ||
969 | (unsigned long long)zero_start, | ||
970 | (unsigned long long)i_size_read(inode)); | ||
971 | while (zero_start < zero_to_size) { | ||
972 | ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, | ||
973 | zero_to_size, | ||
974 | &range_start, | ||
975 | &range_end); | ||
976 | if (ret) { | ||
977 | mlog_errno(ret); | ||
978 | break; | ||
979 | } | ||
980 | if (!range_end) | ||
981 | break; | ||
982 | /* Trim the ends */ | ||
983 | if (range_start < zero_start) | ||
984 | range_start = zero_start; | ||
985 | if (range_end > zero_to_size) | ||
986 | range_end = zero_to_size; | ||
987 | |||
988 | ret = ocfs2_zero_extend_range(inode, range_start, | ||
989 | range_end); | ||
990 | if (ret) { | ||
991 | mlog_errno(ret); | ||
992 | break; | ||
993 | } | ||
994 | zero_start = range_end; | ||
995 | } | ||
996 | |||
814 | return ret; | 997 | return ret; |
815 | } | 998 | } |
816 | 999 | ||
817 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) | 1000 | int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, |
1001 | u64 new_i_size, u64 zero_to) | ||
818 | { | 1002 | { |
819 | int ret; | 1003 | int ret; |
820 | u32 clusters_to_add; | 1004 | u32 clusters_to_add; |
821 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1005 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
822 | 1006 | ||
1007 | /* | ||
1008 | * Only quota files call this without a bh, and they can't be | ||
1009 | * refcounted. | ||
1010 | */ | ||
1011 | BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
1012 | BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE)); | ||
1013 | |||
823 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); | 1014 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); |
824 | if (clusters_to_add < oi->ip_clusters) | 1015 | if (clusters_to_add < oi->ip_clusters) |
825 | clusters_to_add = 0; | 1016 | clusters_to_add = 0; |
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) | |||
840 | * still need to zero the area between the old i_size and the | 1031 | * still need to zero the area between the old i_size and the |
841 | * new i_size. | 1032 | * new i_size. |
842 | */ | 1033 | */ |
843 | ret = ocfs2_zero_extend(inode, zero_to); | 1034 | ret = ocfs2_zero_extend(inode, di_bh, zero_to); |
844 | if (ret < 0) | 1035 | if (ret < 0) |
845 | mlog_errno(ret); | 1036 | mlog_errno(ret); |
846 | 1037 | ||
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode, | |||
862 | goto out; | 1053 | goto out; |
863 | 1054 | ||
864 | if (i_size_read(inode) == new_i_size) | 1055 | if (i_size_read(inode) == new_i_size) |
865 | goto out; | 1056 | goto out; |
866 | BUG_ON(new_i_size < i_size_read(inode)); | 1057 | BUG_ON(new_i_size < i_size_read(inode)); |
867 | 1058 | ||
868 | /* | 1059 | /* |
869 | * Fall through for converting inline data, even if the fs | ||
870 | * supports sparse files. | ||
871 | * | ||
872 | * The check for inline data here is legal - nobody can add | ||
873 | * the feature since we have i_mutex. We must check it again | ||
874 | * after acquiring ip_alloc_sem though, as paths like mmap | ||
875 | * might have raced us to converting the inode to extents. | ||
876 | */ | ||
877 | if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
878 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | ||
879 | goto out_update_size; | ||
880 | |||
881 | /* | ||
882 | * The alloc sem blocks people in read/write from reading our | 1060 | * The alloc sem blocks people in read/write from reading our |
883 | * allocation until we're done changing it. We depend on | 1061 | * allocation until we're done changing it. We depend on |
884 | * i_mutex to block other extend/truncate calls while we're | 1062 | * i_mutex to block other extend/truncate calls while we're |
885 | * here. | 1063 | * here. We even have to hold it for sparse files because there |
1064 | * might be some tail zeroing. | ||
886 | */ | 1065 | */ |
887 | down_write(&oi->ip_alloc_sem); | 1066 | down_write(&oi->ip_alloc_sem); |
888 | 1067 | ||
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode, | |||
899 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); | 1078 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); |
900 | if (ret) { | 1079 | if (ret) { |
901 | up_write(&oi->ip_alloc_sem); | 1080 | up_write(&oi->ip_alloc_sem); |
902 | |||
903 | mlog_errno(ret); | 1081 | mlog_errno(ret); |
904 | goto out; | 1082 | goto out; |
905 | } | 1083 | } |
906 | } | 1084 | } |
907 | 1085 | ||
908 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 1086 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
909 | ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); | 1087 | ret = ocfs2_zero_extend(inode, di_bh, new_i_size); |
1088 | else | ||
1089 | ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size, | ||
1090 | new_i_size); | ||
910 | 1091 | ||
911 | up_write(&oi->ip_alloc_sem); | 1092 | up_write(&oi->ip_alloc_sem); |
912 | 1093 | ||
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index d66cf4f7c70e..97bf761c9e7c 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, | |||
54 | int ocfs2_simple_size_update(struct inode *inode, | 54 | int ocfs2_simple_size_update(struct inode *inode, |
55 | struct buffer_head *di_bh, | 55 | struct buffer_head *di_bh, |
56 | u64 new_i_size); | 56 | u64 new_i_size); |
57 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, | 57 | int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, |
58 | u64 zero_to); | 58 | u64 new_i_size, u64 zero_to); |
59 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, | ||
60 | loff_t zero_to); | ||
59 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 61 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
60 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | 62 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, |
61 | struct kstat *stat); | 63 | struct kstat *stat); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 47878cf16418..625de9d7088c 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger | |||
472 | return container_of(triggers, struct ocfs2_triggers, ot_triggers); | 472 | return container_of(triggers, struct ocfs2_triggers, ot_triggers); |
473 | } | 473 | } |
474 | 474 | ||
475 | static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | 475 | static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, |
476 | struct buffer_head *bh, | 476 | struct buffer_head *bh, |
477 | void *data, size_t size) | 477 | void *data, size_t size) |
478 | { | 478 | { |
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | |||
491 | * Quota blocks have their own trigger because the struct ocfs2_block_check | 491 | * Quota blocks have their own trigger because the struct ocfs2_block_check |
492 | * offset depends on the blocksize. | 492 | * offset depends on the blocksize. |
493 | */ | 493 | */ |
494 | static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | 494 | static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, |
495 | struct buffer_head *bh, | 495 | struct buffer_head *bh, |
496 | void *data, size_t size) | 496 | void *data, size_t size) |
497 | { | 497 | { |
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | |||
511 | * Directory blocks also have their own trigger because the | 511 | * Directory blocks also have their own trigger because the |
512 | * struct ocfs2_block_check offset depends on the blocksize. | 512 | * struct ocfs2_block_check offset depends on the blocksize. |
513 | */ | 513 | */ |
514 | static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | 514 | static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, |
515 | struct buffer_head *bh, | 515 | struct buffer_head *bh, |
516 | void *data, size_t size) | 516 | void *data, size_t size) |
517 | { | 517 | { |
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, | |||
544 | 544 | ||
545 | static struct ocfs2_triggers di_triggers = { | 545 | static struct ocfs2_triggers di_triggers = { |
546 | .ot_triggers = { | 546 | .ot_triggers = { |
547 | .t_commit = ocfs2_commit_trigger, | 547 | .t_frozen = ocfs2_frozen_trigger, |
548 | .t_abort = ocfs2_abort_trigger, | 548 | .t_abort = ocfs2_abort_trigger, |
549 | }, | 549 | }, |
550 | .ot_offset = offsetof(struct ocfs2_dinode, i_check), | 550 | .ot_offset = offsetof(struct ocfs2_dinode, i_check), |
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = { | |||
552 | 552 | ||
553 | static struct ocfs2_triggers eb_triggers = { | 553 | static struct ocfs2_triggers eb_triggers = { |
554 | .ot_triggers = { | 554 | .ot_triggers = { |
555 | .t_commit = ocfs2_commit_trigger, | 555 | .t_frozen = ocfs2_frozen_trigger, |
556 | .t_abort = ocfs2_abort_trigger, | 556 | .t_abort = ocfs2_abort_trigger, |
557 | }, | 557 | }, |
558 | .ot_offset = offsetof(struct ocfs2_extent_block, h_check), | 558 | .ot_offset = offsetof(struct ocfs2_extent_block, h_check), |
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = { | |||
560 | 560 | ||
561 | static struct ocfs2_triggers rb_triggers = { | 561 | static struct ocfs2_triggers rb_triggers = { |
562 | .ot_triggers = { | 562 | .ot_triggers = { |
563 | .t_commit = ocfs2_commit_trigger, | 563 | .t_frozen = ocfs2_frozen_trigger, |
564 | .t_abort = ocfs2_abort_trigger, | 564 | .t_abort = ocfs2_abort_trigger, |
565 | }, | 565 | }, |
566 | .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), | 566 | .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), |
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = { | |||
568 | 568 | ||
569 | static struct ocfs2_triggers gd_triggers = { | 569 | static struct ocfs2_triggers gd_triggers = { |
570 | .ot_triggers = { | 570 | .ot_triggers = { |
571 | .t_commit = ocfs2_commit_trigger, | 571 | .t_frozen = ocfs2_frozen_trigger, |
572 | .t_abort = ocfs2_abort_trigger, | 572 | .t_abort = ocfs2_abort_trigger, |
573 | }, | 573 | }, |
574 | .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), | 574 | .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), |
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = { | |||
576 | 576 | ||
577 | static struct ocfs2_triggers db_triggers = { | 577 | static struct ocfs2_triggers db_triggers = { |
578 | .ot_triggers = { | 578 | .ot_triggers = { |
579 | .t_commit = ocfs2_db_commit_trigger, | 579 | .t_frozen = ocfs2_db_frozen_trigger, |
580 | .t_abort = ocfs2_abort_trigger, | 580 | .t_abort = ocfs2_abort_trigger, |
581 | }, | 581 | }, |
582 | }; | 582 | }; |
583 | 583 | ||
584 | static struct ocfs2_triggers xb_triggers = { | 584 | static struct ocfs2_triggers xb_triggers = { |
585 | .ot_triggers = { | 585 | .ot_triggers = { |
586 | .t_commit = ocfs2_commit_trigger, | 586 | .t_frozen = ocfs2_frozen_trigger, |
587 | .t_abort = ocfs2_abort_trigger, | 587 | .t_abort = ocfs2_abort_trigger, |
588 | }, | 588 | }, |
589 | .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), | 589 | .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), |
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = { | |||
591 | 591 | ||
592 | static struct ocfs2_triggers dq_triggers = { | 592 | static struct ocfs2_triggers dq_triggers = { |
593 | .ot_triggers = { | 593 | .ot_triggers = { |
594 | .t_commit = ocfs2_dq_commit_trigger, | 594 | .t_frozen = ocfs2_dq_frozen_trigger, |
595 | .t_abort = ocfs2_abort_trigger, | 595 | .t_abort = ocfs2_abort_trigger, |
596 | }, | 596 | }, |
597 | }; | 597 | }; |
598 | 598 | ||
599 | static struct ocfs2_triggers dr_triggers = { | 599 | static struct ocfs2_triggers dr_triggers = { |
600 | .ot_triggers = { | 600 | .ot_triggers = { |
601 | .t_commit = ocfs2_commit_trigger, | 601 | .t_frozen = ocfs2_frozen_trigger, |
602 | .t_abort = ocfs2_abort_trigger, | 602 | .t_abort = ocfs2_abort_trigger, |
603 | }, | 603 | }, |
604 | .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), | 604 | .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), |
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = { | |||
606 | 606 | ||
607 | static struct ocfs2_triggers dl_triggers = { | 607 | static struct ocfs2_triggers dl_triggers = { |
608 | .ot_triggers = { | 608 | .ot_triggers = { |
609 | .t_commit = ocfs2_commit_trigger, | 609 | .t_frozen = ocfs2_frozen_trigger, |
610 | .t_abort = ocfs2_abort_trigger, | 610 | .t_abort = ocfs2_abort_trigger, |
611 | }, | 611 | }, |
612 | .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), | 612 | .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), |
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work) | |||
1936 | mutex_lock(&os->os_lock); | 1936 | mutex_lock(&os->os_lock); |
1937 | ocfs2_queue_orphan_scan(osb); | 1937 | ocfs2_queue_orphan_scan(osb); |
1938 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) | 1938 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) |
1939 | schedule_delayed_work(&os->os_orphan_scan_work, | 1939 | queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, |
1940 | ocfs2_orphan_scan_timeout()); | 1940 | ocfs2_orphan_scan_timeout()); |
1941 | mutex_unlock(&os->os_lock); | 1941 | mutex_unlock(&os->os_lock); |
1942 | } | 1942 | } |
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) | |||
1976 | atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); | 1976 | atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); |
1977 | else { | 1977 | else { |
1978 | atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); | 1978 | atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); |
1979 | schedule_delayed_work(&os->os_orphan_scan_work, | 1979 | queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, |
1980 | ocfs2_orphan_scan_timeout()); | 1980 | ocfs2_orphan_scan_timeout()); |
1981 | } | 1981 | } |
1982 | } | 1982 | } |
1983 | 1983 | ||
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 3d7419682dc0..ec6adbf8f551 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) | |||
118 | { | 118 | { |
119 | unsigned int la_mb; | 119 | unsigned int la_mb; |
120 | unsigned int gd_mb; | 120 | unsigned int gd_mb; |
121 | unsigned int la_max_mb; | ||
121 | unsigned int megs_per_slot; | 122 | unsigned int megs_per_slot; |
122 | struct super_block *sb = osb->sb; | 123 | struct super_block *sb = osb->sb; |
123 | 124 | ||
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) | |||
182 | if (megs_per_slot < la_mb) | 183 | if (megs_per_slot < la_mb) |
183 | la_mb = megs_per_slot; | 184 | la_mb = megs_per_slot; |
184 | 185 | ||
186 | /* We can't store more bits than we can in a block. */ | ||
187 | la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, | ||
188 | ocfs2_local_alloc_size(sb) * 8); | ||
189 | if (la_mb > la_max_mb) | ||
190 | la_mb = la_max_mb; | ||
191 | |||
185 | return la_mb; | 192 | return la_mb; |
186 | } | 193 | } |
187 | 194 | ||
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 2bb35fe00511..4607923eb24c 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) | |||
775 | * locking allocators ranks above a transaction start | 775 | * locking allocators ranks above a transaction start |
776 | */ | 776 | */ |
777 | WARN_ON(journal_current_handle()); | 777 | WARN_ON(journal_current_handle()); |
778 | status = ocfs2_extend_no_holes(gqinode, | 778 | status = ocfs2_extend_no_holes(gqinode, NULL, |
779 | gqinode->i_size + (need_alloc << sb->s_blocksize_bits), | 779 | gqinode->i_size + (need_alloc << sb->s_blocksize_bits), |
780 | gqinode->i_size); | 780 | gqinode->i_size); |
781 | if (status < 0) | 781 | if (status < 0) |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 8bd70d4d184d..dc78764ccc4c 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
971 | u64 p_blkno; | 971 | u64 p_blkno; |
972 | 972 | ||
973 | /* We are protected by dqio_sem so no locking needed */ | 973 | /* We are protected by dqio_sem so no locking needed */ |
974 | status = ocfs2_extend_no_holes(lqinode, | 974 | status = ocfs2_extend_no_holes(lqinode, NULL, |
975 | lqinode->i_size + 2 * sb->s_blocksize, | 975 | lqinode->i_size + 2 * sb->s_blocksize, |
976 | lqinode->i_size); | 976 | lqinode->i_size); |
977 | if (status < 0) { | 977 | if (status < 0) { |
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1114 | return ocfs2_local_quota_add_chunk(sb, type, offset); | 1114 | return ocfs2_local_quota_add_chunk(sb, type, offset); |
1115 | 1115 | ||
1116 | /* We are protected by dqio_sem so no locking needed */ | 1116 | /* We are protected by dqio_sem so no locking needed */ |
1117 | status = ocfs2_extend_no_holes(lqinode, | 1117 | status = ocfs2_extend_no_holes(lqinode, NULL, |
1118 | lqinode->i_size + sb->s_blocksize, | 1118 | lqinode->i_size + sb->s_blocksize, |
1119 | lqinode->i_size); | 1119 | lqinode->i_size); |
1120 | if (status < 0) { | 1120 | if (status < 0) { |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 4793f36f6518..3ac5aa733e9c 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2931 | 2931 | ||
2932 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 2932 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
2933 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); | 2933 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); |
2934 | /* | ||
2935 | * We only duplicate pages until we reach the page contains i_size - 1. | ||
2936 | * So trim 'end' to i_size. | ||
2937 | */ | ||
2938 | if (end > i_size_read(context->inode)) | ||
2939 | end = i_size_read(context->inode); | ||
2934 | 2940 | ||
2935 | while (offset < end) { | 2941 | while (offset < end) { |
2936 | page_index = offset >> PAGE_CACHE_SHIFT; | 2942 | page_index = offset >> PAGE_CACHE_SHIFT; |
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry, | |||
4166 | struct inode *inode = old_dentry->d_inode; | 4172 | struct inode *inode = old_dentry->d_inode; |
4167 | struct buffer_head *new_bh = NULL; | 4173 | struct buffer_head *new_bh = NULL; |
4168 | 4174 | ||
4175 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) { | ||
4176 | ret = -EINVAL; | ||
4177 | mlog_errno(ret); | ||
4178 | goto out; | ||
4179 | } | ||
4180 | |||
4169 | ret = filemap_fdatawrite(inode->i_mapping); | 4181 | ret = filemap_fdatawrite(inode->i_mapping); |
4170 | if (ret) { | 4182 | if (ret) { |
4171 | mlog_errno(ret); | 4183 | mlog_errno(ret); |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f4c2a9eb8c4d..a8e6a95a353f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
741 | le16_to_cpu(bg->bg_free_bits_count)); | 741 | le16_to_cpu(bg->bg_free_bits_count)); |
742 | le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, | 742 | le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, |
743 | le16_to_cpu(bg->bg_bits)); | 743 | le16_to_cpu(bg->bg_bits)); |
744 | cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno); | 744 | cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno; |
745 | if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) | 745 | if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) |
746 | le16_add_cpu(&cl->cl_next_free_rec, 1); | 746 | le16_add_cpu(&cl->cl_next_free_rec, 1); |
747 | 747 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index e97b34842cfe..d03469f61801 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, | |||
709 | struct ocfs2_xattr_value_buf *vb, | 709 | struct ocfs2_xattr_value_buf *vb, |
710 | struct ocfs2_xattr_set_ctxt *ctxt) | 710 | struct ocfs2_xattr_set_ctxt *ctxt) |
711 | { | 711 | { |
712 | int status = 0; | 712 | int status = 0, credits; |
713 | handle_t *handle = ctxt->handle; | 713 | handle_t *handle = ctxt->handle; |
714 | enum ocfs2_alloc_restarted why; | 714 | enum ocfs2_alloc_restarted why; |
715 | u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); | 715 | u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); |
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, | |||
719 | 719 | ||
720 | ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); | 720 | ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); |
721 | 721 | ||
722 | status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, | 722 | while (clusters_to_add) { |
723 | OCFS2_JOURNAL_ACCESS_WRITE); | 723 | status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, |
724 | if (status < 0) { | 724 | OCFS2_JOURNAL_ACCESS_WRITE); |
725 | mlog_errno(status); | 725 | if (status < 0) { |
726 | goto leave; | 726 | mlog_errno(status); |
727 | } | 727 | break; |
728 | } | ||
728 | 729 | ||
729 | prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); | 730 | prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); |
730 | status = ocfs2_add_clusters_in_btree(handle, | 731 | status = ocfs2_add_clusters_in_btree(handle, |
731 | &et, | 732 | &et, |
732 | &logical_start, | 733 | &logical_start, |
733 | clusters_to_add, | 734 | clusters_to_add, |
734 | 0, | 735 | 0, |
735 | ctxt->data_ac, | 736 | ctxt->data_ac, |
736 | ctxt->meta_ac, | 737 | ctxt->meta_ac, |
737 | &why); | 738 | &why); |
738 | if (status < 0) { | 739 | if ((status < 0) && (status != -EAGAIN)) { |
739 | mlog_errno(status); | 740 | if (status != -ENOSPC) |
740 | goto leave; | 741 | mlog_errno(status); |
741 | } | 742 | break; |
743 | } | ||
742 | 744 | ||
743 | ocfs2_journal_dirty(handle, vb->vb_bh); | 745 | ocfs2_journal_dirty(handle, vb->vb_bh); |
744 | 746 | ||
745 | clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; | 747 | clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - |
748 | prev_clusters; | ||
746 | 749 | ||
747 | /* | 750 | if (why != RESTART_NONE && clusters_to_add) { |
748 | * We should have already allocated enough space before the transaction, | 751 | /* |
749 | * so no need to restart. | 752 | * We can only fail in case the alloc file doesn't give |
750 | */ | 753 | * up enough clusters. |
751 | BUG_ON(why != RESTART_NONE || clusters_to_add); | 754 | */ |
752 | 755 | BUG_ON(why == RESTART_META); | |
753 | leave: | 756 | |
757 | mlog(0, "restarting xattr value extension for %u" | ||
758 | " clusters,.\n", clusters_to_add); | ||
759 | credits = ocfs2_calc_extend_credits(inode->i_sb, | ||
760 | &vb->vb_xv->xr_list, | ||
761 | clusters_to_add); | ||
762 | status = ocfs2_extend_trans(handle, credits); | ||
763 | if (status < 0) { | ||
764 | status = -ENOMEM; | ||
765 | mlog_errno(status); | ||
766 | break; | ||
767 | } | ||
768 | } | ||
769 | } | ||
754 | 770 | ||
755 | return status; | 771 | return status; |
756 | } | 772 | } |
@@ -6788,16 +6804,15 @@ out: | |||
6788 | return ret; | 6804 | return ret; |
6789 | } | 6805 | } |
6790 | 6806 | ||
6791 | static int ocfs2_reflink_xattr_buckets(handle_t *handle, | 6807 | static int ocfs2_reflink_xattr_bucket(handle_t *handle, |
6792 | u64 blkno, u64 new_blkno, u32 clusters, | 6808 | u64 blkno, u64 new_blkno, u32 clusters, |
6809 | u32 *cpos, int num_buckets, | ||
6793 | struct ocfs2_alloc_context *meta_ac, | 6810 | struct ocfs2_alloc_context *meta_ac, |
6794 | struct ocfs2_alloc_context *data_ac, | 6811 | struct ocfs2_alloc_context *data_ac, |
6795 | struct ocfs2_reflink_xattr_tree_args *args) | 6812 | struct ocfs2_reflink_xattr_tree_args *args) |
6796 | { | 6813 | { |
6797 | int i, j, ret = 0; | 6814 | int i, j, ret = 0; |
6798 | struct super_block *sb = args->reflink->old_inode->i_sb; | 6815 | struct super_block *sb = args->reflink->old_inode->i_sb; |
6799 | u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); | ||
6800 | u32 num_buckets = clusters * bpc; | ||
6801 | int bpb = args->old_bucket->bu_blocks; | 6816 | int bpb = args->old_bucket->bu_blocks; |
6802 | struct ocfs2_xattr_value_buf vb = { | 6817 | struct ocfs2_xattr_value_buf vb = { |
6803 | .vb_access = ocfs2_journal_access, | 6818 | .vb_access = ocfs2_journal_access, |
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, | |||
6816 | break; | 6831 | break; |
6817 | } | 6832 | } |
6818 | 6833 | ||
6819 | /* | ||
6820 | * The real bucket num in this series of blocks is stored | ||
6821 | * in the 1st bucket. | ||
6822 | */ | ||
6823 | if (i == 0) | ||
6824 | num_buckets = le16_to_cpu( | ||
6825 | bucket_xh(args->old_bucket)->xh_num_buckets); | ||
6826 | |||
6827 | ret = ocfs2_xattr_bucket_journal_access(handle, | 6834 | ret = ocfs2_xattr_bucket_journal_access(handle, |
6828 | args->new_bucket, | 6835 | args->new_bucket, |
6829 | OCFS2_JOURNAL_ACCESS_CREATE); | 6836 | OCFS2_JOURNAL_ACCESS_CREATE); |
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, | |||
6837 | bucket_block(args->old_bucket, j), | 6844 | bucket_block(args->old_bucket, j), |
6838 | sb->s_blocksize); | 6845 | sb->s_blocksize); |
6839 | 6846 | ||
6847 | /* | ||
6848 | * Record the start cpos so that we can use it to initialize | ||
6849 | * our xattr tree we also set the xh_num_bucket for the new | ||
6850 | * bucket. | ||
6851 | */ | ||
6852 | if (i == 0) { | ||
6853 | *cpos = le32_to_cpu(bucket_xh(args->new_bucket)-> | ||
6854 | xh_entries[0].xe_name_hash); | ||
6855 | bucket_xh(args->new_bucket)->xh_num_buckets = | ||
6856 | cpu_to_le16(num_buckets); | ||
6857 | } | ||
6858 | |||
6840 | ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); | 6859 | ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); |
6841 | 6860 | ||
6842 | ret = ocfs2_reflink_xattr_header(handle, args->reflink, | 6861 | ret = ocfs2_reflink_xattr_header(handle, args->reflink, |
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, | |||
6866 | } | 6885 | } |
6867 | 6886 | ||
6868 | ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); | 6887 | ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); |
6888 | |||
6869 | ocfs2_xattr_bucket_relse(args->old_bucket); | 6889 | ocfs2_xattr_bucket_relse(args->old_bucket); |
6870 | ocfs2_xattr_bucket_relse(args->new_bucket); | 6890 | ocfs2_xattr_bucket_relse(args->new_bucket); |
6871 | } | 6891 | } |
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle, | |||
6874 | ocfs2_xattr_bucket_relse(args->new_bucket); | 6894 | ocfs2_xattr_bucket_relse(args->new_bucket); |
6875 | return ret; | 6895 | return ret; |
6876 | } | 6896 | } |
6897 | |||
6898 | static int ocfs2_reflink_xattr_buckets(handle_t *handle, | ||
6899 | struct inode *inode, | ||
6900 | struct ocfs2_reflink_xattr_tree_args *args, | ||
6901 | struct ocfs2_extent_tree *et, | ||
6902 | struct ocfs2_alloc_context *meta_ac, | ||
6903 | struct ocfs2_alloc_context *data_ac, | ||
6904 | u64 blkno, u32 cpos, u32 len) | ||
6905 | { | ||
6906 | int ret, first_inserted = 0; | ||
6907 | u32 p_cluster, num_clusters, reflink_cpos = 0; | ||
6908 | u64 new_blkno; | ||
6909 | unsigned int num_buckets, reflink_buckets; | ||
6910 | unsigned int bpc = | ||
6911 | ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); | ||
6912 | |||
6913 | ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); | ||
6914 | if (ret) { | ||
6915 | mlog_errno(ret); | ||
6916 | goto out; | ||
6917 | } | ||
6918 | num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets); | ||
6919 | ocfs2_xattr_bucket_relse(args->old_bucket); | ||
6920 | |||
6921 | while (len && num_buckets) { | ||
6922 | ret = ocfs2_claim_clusters(handle, data_ac, | ||
6923 | 1, &p_cluster, &num_clusters); | ||
6924 | if (ret) { | ||
6925 | mlog_errno(ret); | ||
6926 | goto out; | ||
6927 | } | ||
6928 | |||
6929 | new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | ||
6930 | reflink_buckets = min(num_buckets, bpc * num_clusters); | ||
6931 | |||
6932 | ret = ocfs2_reflink_xattr_bucket(handle, blkno, | ||
6933 | new_blkno, num_clusters, | ||
6934 | &reflink_cpos, reflink_buckets, | ||
6935 | meta_ac, data_ac, args); | ||
6936 | if (ret) { | ||
6937 | mlog_errno(ret); | ||
6938 | goto out; | ||
6939 | } | ||
6940 | |||
6941 | /* | ||
6942 | * For the 1st allocated cluster, we make it use the same cpos | ||
6943 | * so that the xattr tree looks the same as the original one | ||
6944 | * in the most case. | ||
6945 | */ | ||
6946 | if (!first_inserted) { | ||
6947 | reflink_cpos = cpos; | ||
6948 | first_inserted = 1; | ||
6949 | } | ||
6950 | ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno, | ||
6951 | num_clusters, 0, meta_ac); | ||
6952 | if (ret) | ||
6953 | mlog_errno(ret); | ||
6954 | |||
6955 | mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", | ||
6956 | (unsigned long long)new_blkno, num_clusters, reflink_cpos); | ||
6957 | |||
6958 | len -= num_clusters; | ||
6959 | blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); | ||
6960 | num_buckets -= reflink_buckets; | ||
6961 | } | ||
6962 | out: | ||
6963 | return ret; | ||
6964 | } | ||
6965 | |||
6877 | /* | 6966 | /* |
6878 | * Create the same xattr extent record in the new inode's xattr tree. | 6967 | * Create the same xattr extent record in the new inode's xattr tree. |
6879 | */ | 6968 | */ |
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, | |||
6885 | void *para) | 6974 | void *para) |
6886 | { | 6975 | { |
6887 | int ret, credits = 0; | 6976 | int ret, credits = 0; |
6888 | u32 p_cluster, num_clusters; | ||
6889 | u64 new_blkno; | ||
6890 | handle_t *handle; | 6977 | handle_t *handle; |
6891 | struct ocfs2_reflink_xattr_tree_args *args = | 6978 | struct ocfs2_reflink_xattr_tree_args *args = |
6892 | (struct ocfs2_reflink_xattr_tree_args *)para; | 6979 | (struct ocfs2_reflink_xattr_tree_args *)para; |
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, | |||
6895 | struct ocfs2_alloc_context *data_ac = NULL; | 6982 | struct ocfs2_alloc_context *data_ac = NULL; |
6896 | struct ocfs2_extent_tree et; | 6983 | struct ocfs2_extent_tree et; |
6897 | 6984 | ||
6985 | mlog(0, "reflink xattr buckets %llu len %u\n", | ||
6986 | (unsigned long long)blkno, len); | ||
6987 | |||
6898 | ocfs2_init_xattr_tree_extent_tree(&et, | 6988 | ocfs2_init_xattr_tree_extent_tree(&et, |
6899 | INODE_CACHE(args->reflink->new_inode), | 6989 | INODE_CACHE(args->reflink->new_inode), |
6900 | args->new_blk_bh); | 6990 | args->new_blk_bh); |
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode, | |||
6914 | goto out; | 7004 | goto out; |
6915 | } | 7005 | } |
6916 | 7006 | ||
6917 | ret = ocfs2_claim_clusters(handle, data_ac, | 7007 | ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et, |
6918 | len, &p_cluster, &num_clusters); | 7008 | meta_ac, data_ac, |
6919 | if (ret) { | 7009 | blkno, cpos, len); |
6920 | mlog_errno(ret); | ||
6921 | goto out_commit; | ||
6922 | } | ||
6923 | |||
6924 | new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster); | ||
6925 | |||
6926 | mlog(0, "reflink xattr buckets %llu to %llu, len %u\n", | ||
6927 | (unsigned long long)blkno, (unsigned long long)new_blkno, len); | ||
6928 | ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len, | ||
6929 | meta_ac, data_ac, args); | ||
6930 | if (ret) { | ||
6931 | mlog_errno(ret); | ||
6932 | goto out_commit; | ||
6933 | } | ||
6934 | |||
6935 | mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", | ||
6936 | (unsigned long long)new_blkno, len, cpos); | ||
6937 | ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno, | ||
6938 | len, 0, meta_ac); | ||
6939 | if (ret) | 7010 | if (ret) |
6940 | mlog_errno(ret); | 7011 | mlog_errno(ret); |
6941 | 7012 | ||
6942 | out_commit: | ||
6943 | ocfs2_commit_trans(osb, handle); | 7013 | ocfs2_commit_trans(osb, handle); |
6944 | 7014 | ||
6945 | out: | 7015 | out: |
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 3e73de5967ff..fc8497643fd0 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
74 | } *label; | 74 | } *label; |
75 | unsigned char *data; | 75 | unsigned char *data; |
76 | Sector sect; | 76 | Sector sect; |
77 | sector_t labelsect; | ||
77 | 78 | ||
78 | res = 0; | 79 | res = 0; |
79 | blocksize = bdev_logical_block_size(bdev); | 80 | blocksize = bdev_logical_block_size(bdev); |
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state) | |||
98 | goto out_freeall; | 99 | goto out_freeall; |
99 | 100 | ||
100 | /* | 101 | /* |
102 | * Special case for FBA disks: label sector does not depend on | ||
103 | * blocksize. | ||
104 | */ | ||
105 | if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) || | ||
106 | (info->cu_type == 0x3880 && info->dev_type == 0x3370)) | ||
107 | labelsect = info->label_block; | ||
108 | else | ||
109 | labelsect = info->label_block * (blocksize >> 9); | ||
110 | |||
111 | /* | ||
101 | * Get volume label, extract name and type. | 112 | * Get volume label, extract name and type. |
102 | */ | 113 | */ |
103 | data = read_part_sector(state, info->label_block*(blocksize/512), | 114 | data = read_part_sector(state, labelsect, §); |
104 | §); | ||
105 | if (data == NULL) | 115 | if (data == NULL) |
106 | goto out_readerr; | 116 | goto out_readerr; |
107 | 117 | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 12c233da1b6b..437d2ca2de97 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -676,7 +676,7 @@ static void prune_dqcache(int count) | |||
676 | * This is called from kswapd when we think we need some | 676 | * This is called from kswapd when we think we need some |
677 | * more memory | 677 | * more memory |
678 | */ | 678 | */ |
679 | static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) | 679 | static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) |
680 | { | 680 | { |
681 | if (nr) { | 681 | if (nr) { |
682 | spin_lock(&dq_list_lock); | 682 | spin_lock(&dq_list_lock); |
diff --git a/fs/splice.c b/fs/splice.c index 740e6b9faf7a..efdbfece9932 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, | |||
1282 | { | 1282 | { |
1283 | struct file *file = sd->u.file; | 1283 | struct file *file = sd->u.file; |
1284 | 1284 | ||
1285 | return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); | 1285 | return do_splice_from(pipe, file, &file->f_pos, sd->total_len, |
1286 | sd->flags); | ||
1286 | } | 1287 | } |
1287 | 1288 | ||
1288 | /** | 1289 | /** |
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1371 | if (off_in) | 1372 | if (off_in) |
1372 | return -ESPIPE; | 1373 | return -ESPIPE; |
1373 | if (off_out) { | 1374 | if (off_out) { |
1374 | if (!out->f_op || !out->f_op->llseek || | 1375 | if (!(out->f_mode & FMODE_PWRITE)) |
1375 | out->f_op->llseek == no_llseek) | ||
1376 | return -EINVAL; | 1376 | return -EINVAL; |
1377 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) | 1377 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) |
1378 | return -EFAULT; | 1378 | return -EFAULT; |
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1392 | if (off_out) | 1392 | if (off_out) |
1393 | return -ESPIPE; | 1393 | return -ESPIPE; |
1394 | if (off_in) { | 1394 | if (off_in) { |
1395 | if (!in->f_op || !in->f_op->llseek || | 1395 | if (!(in->f_mode & FMODE_PREAD)) |
1396 | in->f_op->llseek == no_llseek) | ||
1397 | return -EINVAL; | 1396 | return -EINVAL; |
1398 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) | 1397 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) |
1399 | return -EFAULT; | 1398 | return -EFAULT; |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 02feb59cefca..0b201114a5ad 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
@@ -277,7 +277,7 @@ static int kick_a_thread(void) | |||
277 | return 0; | 277 | return 0; |
278 | } | 278 | } |
279 | 279 | ||
280 | int ubifs_shrinker(int nr, gfp_t gfp_mask) | 280 | int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) |
281 | { | 281 | { |
282 | int freed, contention = 0; | 282 | int freed, contention = 0; |
283 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | 283 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 2eef553d50c8..04310878f449 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); | |||
1575 | int ubifs_tnc_end_commit(struct ubifs_info *c); | 1575 | int ubifs_tnc_end_commit(struct ubifs_info *c); |
1576 | 1576 | ||
1577 | /* shrinker.c */ | 1577 | /* shrinker.c */ |
1578 | int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); | 1578 | int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); |
1579 | 1579 | ||
1580 | /* commit.c */ | 1580 | /* commit.c */ |
1581 | int ubifs_bg_thread(void *info); | 1581 | int ubifs_bg_thread(void *info); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 649ade8ef598..2ee3f7a60163 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -45,7 +45,7 @@ | |||
45 | 45 | ||
46 | static kmem_zone_t *xfs_buf_zone; | 46 | static kmem_zone_t *xfs_buf_zone; |
47 | STATIC int xfsbufd(void *); | 47 | STATIC int xfsbufd(void *); |
48 | STATIC int xfsbufd_wakeup(int, gfp_t); | 48 | STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t); |
49 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); | 49 | STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); |
50 | static struct shrinker xfs_buf_shake = { | 50 | static struct shrinker xfs_buf_shake = { |
51 | .shrink = xfsbufd_wakeup, | 51 | .shrink = xfsbufd_wakeup, |
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages( | |||
340 | __func__, gfp_mask); | 340 | __func__, gfp_mask); |
341 | 341 | ||
342 | XFS_STATS_INC(xb_page_retries); | 342 | XFS_STATS_INC(xb_page_retries); |
343 | xfsbufd_wakeup(0, gfp_mask); | 343 | xfsbufd_wakeup(NULL, 0, gfp_mask); |
344 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 344 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
345 | goto retry; | 345 | goto retry; |
346 | } | 346 | } |
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues( | |||
1762 | 1762 | ||
1763 | STATIC int | 1763 | STATIC int |
1764 | xfsbufd_wakeup( | 1764 | xfsbufd_wakeup( |
1765 | struct shrinker *shrink, | ||
1765 | int priority, | 1766 | int priority, |
1766 | gfp_t mask) | 1767 | gfp_t mask) |
1767 | { | 1768 | { |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f2d1718c9165..80938c736c27 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1883,7 +1883,6 @@ init_xfs_fs(void) | |||
1883 | goto out_cleanup_procfs; | 1883 | goto out_cleanup_procfs; |
1884 | 1884 | ||
1885 | vfs_initquota(); | 1885 | vfs_initquota(); |
1886 | xfs_inode_shrinker_init(); | ||
1887 | 1886 | ||
1888 | error = register_filesystem(&xfs_fs_type); | 1887 | error = register_filesystem(&xfs_fs_type); |
1889 | if (error) | 1888 | if (error) |
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void) | |||
1911 | { | 1910 | { |
1912 | vfs_exitquota(); | 1911 | vfs_exitquota(); |
1913 | unregister_filesystem(&xfs_fs_type); | 1912 | unregister_filesystem(&xfs_fs_type); |
1914 | xfs_inode_shrinker_destroy(); | ||
1915 | xfs_sysctl_unregister(); | 1913 | xfs_sysctl_unregister(); |
1916 | xfs_cleanup_procfs(); | 1914 | xfs_cleanup_procfs(); |
1917 | xfs_buf_terminate(); | 1915 | xfs_buf_terminate(); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ef7f0218bccb..a51a07c3a70c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -144,6 +144,41 @@ restart: | |||
144 | return last_error; | 144 | return last_error; |
145 | } | 145 | } |
146 | 146 | ||
147 | /* | ||
148 | * Select the next per-ag structure to iterate during the walk. The reclaim | ||
149 | * walk is optimised only to walk AGs with reclaimable inodes in them. | ||
150 | */ | ||
151 | static struct xfs_perag * | ||
152 | xfs_inode_ag_iter_next_pag( | ||
153 | struct xfs_mount *mp, | ||
154 | xfs_agnumber_t *first, | ||
155 | int tag) | ||
156 | { | ||
157 | struct xfs_perag *pag = NULL; | ||
158 | |||
159 | if (tag == XFS_ICI_RECLAIM_TAG) { | ||
160 | int found; | ||
161 | int ref; | ||
162 | |||
163 | spin_lock(&mp->m_perag_lock); | ||
164 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
165 | (void **)&pag, *first, 1, tag); | ||
166 | if (found <= 0) { | ||
167 | spin_unlock(&mp->m_perag_lock); | ||
168 | return NULL; | ||
169 | } | ||
170 | *first = pag->pag_agno + 1; | ||
171 | /* open coded pag reference increment */ | ||
172 | ref = atomic_inc_return(&pag->pag_ref); | ||
173 | spin_unlock(&mp->m_perag_lock); | ||
174 | trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); | ||
175 | } else { | ||
176 | pag = xfs_perag_get(mp, *first); | ||
177 | (*first)++; | ||
178 | } | ||
179 | return pag; | ||
180 | } | ||
181 | |||
147 | int | 182 | int |
148 | xfs_inode_ag_iterator( | 183 | xfs_inode_ag_iterator( |
149 | struct xfs_mount *mp, | 184 | struct xfs_mount *mp, |
@@ -154,16 +189,15 @@ xfs_inode_ag_iterator( | |||
154 | int exclusive, | 189 | int exclusive, |
155 | int *nr_to_scan) | 190 | int *nr_to_scan) |
156 | { | 191 | { |
192 | struct xfs_perag *pag; | ||
157 | int error = 0; | 193 | int error = 0; |
158 | int last_error = 0; | 194 | int last_error = 0; |
159 | xfs_agnumber_t ag; | 195 | xfs_agnumber_t ag; |
160 | int nr; | 196 | int nr; |
161 | 197 | ||
162 | nr = nr_to_scan ? *nr_to_scan : INT_MAX; | 198 | nr = nr_to_scan ? *nr_to_scan : INT_MAX; |
163 | for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { | 199 | ag = 0; |
164 | struct xfs_perag *pag; | 200 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { |
165 | |||
166 | pag = xfs_perag_get(mp, ag); | ||
167 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, | 201 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, |
168 | exclusive, &nr); | 202 | exclusive, &nr); |
169 | xfs_perag_put(pag); | 203 | xfs_perag_put(pag); |
@@ -640,6 +674,17 @@ __xfs_inode_set_reclaim_tag( | |||
640 | radix_tree_tag_set(&pag->pag_ici_root, | 674 | radix_tree_tag_set(&pag->pag_ici_root, |
641 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | 675 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), |
642 | XFS_ICI_RECLAIM_TAG); | 676 | XFS_ICI_RECLAIM_TAG); |
677 | |||
678 | if (!pag->pag_ici_reclaimable) { | ||
679 | /* propagate the reclaim tag up into the perag radix tree */ | ||
680 | spin_lock(&ip->i_mount->m_perag_lock); | ||
681 | radix_tree_tag_set(&ip->i_mount->m_perag_tree, | ||
682 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | ||
683 | XFS_ICI_RECLAIM_TAG); | ||
684 | spin_unlock(&ip->i_mount->m_perag_lock); | ||
685 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, | ||
686 | -1, _RET_IP_); | ||
687 | } | ||
643 | pag->pag_ici_reclaimable++; | 688 | pag->pag_ici_reclaimable++; |
644 | } | 689 | } |
645 | 690 | ||
@@ -674,6 +719,16 @@ __xfs_inode_clear_reclaim_tag( | |||
674 | radix_tree_tag_clear(&pag->pag_ici_root, | 719 | radix_tree_tag_clear(&pag->pag_ici_root, |
675 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | 720 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); |
676 | pag->pag_ici_reclaimable--; | 721 | pag->pag_ici_reclaimable--; |
722 | if (!pag->pag_ici_reclaimable) { | ||
723 | /* clear the reclaim tag from the perag radix tree */ | ||
724 | spin_lock(&ip->i_mount->m_perag_lock); | ||
725 | radix_tree_tag_clear(&ip->i_mount->m_perag_tree, | ||
726 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | ||
727 | XFS_ICI_RECLAIM_TAG); | ||
728 | spin_unlock(&ip->i_mount->m_perag_lock); | ||
729 | trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, | ||
730 | -1, _RET_IP_); | ||
731 | } | ||
677 | } | 732 | } |
678 | 733 | ||
679 | /* | 734 | /* |
@@ -828,83 +883,52 @@ xfs_reclaim_inodes( | |||
828 | 883 | ||
829 | /* | 884 | /* |
830 | * Shrinker infrastructure. | 885 | * Shrinker infrastructure. |
831 | * | ||
832 | * This is all far more complex than it needs to be. It adds a global list of | ||
833 | * mounts because the shrinkers can only call a global context. We need to make | ||
834 | * the shrinkers pass a context to avoid the need for global state. | ||
835 | */ | 886 | */ |
836 | static LIST_HEAD(xfs_mount_list); | ||
837 | static struct rw_semaphore xfs_mount_list_lock; | ||
838 | |||
839 | static int | 887 | static int |
840 | xfs_reclaim_inode_shrink( | 888 | xfs_reclaim_inode_shrink( |
889 | struct shrinker *shrink, | ||
841 | int nr_to_scan, | 890 | int nr_to_scan, |
842 | gfp_t gfp_mask) | 891 | gfp_t gfp_mask) |
843 | { | 892 | { |
844 | struct xfs_mount *mp; | 893 | struct xfs_mount *mp; |
845 | struct xfs_perag *pag; | 894 | struct xfs_perag *pag; |
846 | xfs_agnumber_t ag; | 895 | xfs_agnumber_t ag; |
847 | int reclaimable = 0; | 896 | int reclaimable; |
848 | 897 | ||
898 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); | ||
849 | if (nr_to_scan) { | 899 | if (nr_to_scan) { |
850 | if (!(gfp_mask & __GFP_FS)) | 900 | if (!(gfp_mask & __GFP_FS)) |
851 | return -1; | 901 | return -1; |
852 | 902 | ||
853 | down_read(&xfs_mount_list_lock); | 903 | xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, |
854 | list_for_each_entry(mp, &xfs_mount_list, m_mplist) { | ||
855 | xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, | ||
856 | XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); | 904 | XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); |
857 | if (nr_to_scan <= 0) | 905 | /* if we don't exhaust the scan, don't bother coming back */ |
858 | break; | 906 | if (nr_to_scan > 0) |
859 | } | 907 | return -1; |
860 | up_read(&xfs_mount_list_lock); | 908 | } |
861 | } | ||
862 | 909 | ||
863 | down_read(&xfs_mount_list_lock); | 910 | reclaimable = 0; |
864 | list_for_each_entry(mp, &xfs_mount_list, m_mplist) { | 911 | ag = 0; |
865 | for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { | 912 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, |
866 | pag = xfs_perag_get(mp, ag); | 913 | XFS_ICI_RECLAIM_TAG))) { |
867 | reclaimable += pag->pag_ici_reclaimable; | 914 | reclaimable += pag->pag_ici_reclaimable; |
868 | xfs_perag_put(pag); | 915 | xfs_perag_put(pag); |
869 | } | ||
870 | } | 916 | } |
871 | up_read(&xfs_mount_list_lock); | ||
872 | return reclaimable; | 917 | return reclaimable; |
873 | } | 918 | } |
874 | 919 | ||
875 | static struct shrinker xfs_inode_shrinker = { | ||
876 | .shrink = xfs_reclaim_inode_shrink, | ||
877 | .seeks = DEFAULT_SEEKS, | ||
878 | }; | ||
879 | |||
880 | void __init | ||
881 | xfs_inode_shrinker_init(void) | ||
882 | { | ||
883 | init_rwsem(&xfs_mount_list_lock); | ||
884 | register_shrinker(&xfs_inode_shrinker); | ||
885 | } | ||
886 | |||
887 | void | ||
888 | xfs_inode_shrinker_destroy(void) | ||
889 | { | ||
890 | ASSERT(list_empty(&xfs_mount_list)); | ||
891 | unregister_shrinker(&xfs_inode_shrinker); | ||
892 | } | ||
893 | |||
894 | void | 920 | void |
895 | xfs_inode_shrinker_register( | 921 | xfs_inode_shrinker_register( |
896 | struct xfs_mount *mp) | 922 | struct xfs_mount *mp) |
897 | { | 923 | { |
898 | down_write(&xfs_mount_list_lock); | 924 | mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink; |
899 | list_add_tail(&mp->m_mplist, &xfs_mount_list); | 925 | mp->m_inode_shrink.seeks = DEFAULT_SEEKS; |
900 | up_write(&xfs_mount_list_lock); | 926 | register_shrinker(&mp->m_inode_shrink); |
901 | } | 927 | } |
902 | 928 | ||
903 | void | 929 | void |
904 | xfs_inode_shrinker_unregister( | 930 | xfs_inode_shrinker_unregister( |
905 | struct xfs_mount *mp) | 931 | struct xfs_mount *mp) |
906 | { | 932 | { |
907 | down_write(&xfs_mount_list_lock); | 933 | unregister_shrinker(&mp->m_inode_shrink); |
908 | list_del(&mp->m_mplist); | ||
909 | up_write(&xfs_mount_list_lock); | ||
910 | } | 934 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index cdcbaaca9880..e28139aaa4aa 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp, | |||
55 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 55 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), |
56 | int flags, int tag, int write_lock, int *nr_to_scan); | 56 | int flags, int tag, int write_lock, int *nr_to_scan); |
57 | 57 | ||
58 | void xfs_inode_shrinker_init(void); | ||
59 | void xfs_inode_shrinker_destroy(void); | ||
60 | void xfs_inode_shrinker_register(struct xfs_mount *mp); | 58 | void xfs_inode_shrinker_register(struct xfs_mount *mp); |
61 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); | 59 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); |
62 | 60 | ||
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 73d5aa117384..302820690904 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name, \ | |||
124 | unsigned long caller_ip), \ | 124 | unsigned long caller_ip), \ |
125 | TP_ARGS(mp, agno, refcount, caller_ip)) | 125 | TP_ARGS(mp, agno, refcount, caller_ip)) |
126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); | 126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); |
127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); | ||
127 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); | 128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); |
129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); | ||
130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); | ||
128 | 131 | ||
129 | TRACE_EVENT(xfs_attr_list_node_descend, | 132 | TRACE_EVENT(xfs_attr_list_node_descend, |
130 | TP_PROTO(struct xfs_attr_list_context *ctx, | 133 | TP_PROTO(struct xfs_attr_list_context *ctx, |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 8c117ff2e3ab..67c018392d62 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | |||
69 | 69 | ||
70 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 70 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
71 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 71 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
72 | STATIC int xfs_qm_shake(int, gfp_t); | 72 | STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); |
73 | 73 | ||
74 | static struct shrinker xfs_qm_shaker = { | 74 | static struct shrinker xfs_qm_shaker = { |
75 | .shrink = xfs_qm_shake, | 75 | .shrink = xfs_qm_shake, |
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist( | |||
2117 | */ | 2117 | */ |
2118 | /* ARGSUSED */ | 2118 | /* ARGSUSED */ |
2119 | STATIC int | 2119 | STATIC int |
2120 | xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) | 2120 | xfs_qm_shake( |
2121 | struct shrinker *shrink, | ||
2122 | int nr_to_scan, | ||
2123 | gfp_t gfp_mask) | ||
2121 | { | 2124 | { |
2122 | int ndqused, nfree, n; | 2125 | int ndqused, nfree, n; |
2123 | 2126 | ||
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1d2c7eed4eda..5761087ee8ea 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -259,7 +259,7 @@ typedef struct xfs_mount { | |||
259 | wait_queue_head_t m_wait_single_sync_task; | 259 | wait_queue_head_t m_wait_single_sync_task; |
260 | __int64_t m_update_flags; /* sb flags we need to update | 260 | __int64_t m_update_flags; /* sb flags we need to update |
261 | on the next remount,rw */ | 261 | on the next remount,rw */ |
262 | struct list_head m_mplist; /* inode shrinker mount list */ | 262 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
263 | } xfs_mount_t; | 263 | } xfs_mount_t; |
264 | 264 | ||
265 | /* | 265 | /* |