diff options
Diffstat (limited to 'fs')
59 files changed, 1056 insertions, 278 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index aea605c98ba6..aae187a7f94a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -551,6 +551,7 @@ struct block_device *bdgrab(struct block_device *bdev) | |||
| 551 | ihold(bdev->bd_inode); | 551 | ihold(bdev->bd_inode); |
| 552 | return bdev; | 552 | return bdev; |
| 553 | } | 553 | } |
| 554 | EXPORT_SYMBOL(bdgrab); | ||
| 554 | 555 | ||
| 555 | long nr_blockdev_pages(void) | 556 | long nr_blockdev_pages(void) |
| 556 | { | 557 | { |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ecd25a1b4e51..ca9d8f1a3bb6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
| 651 | if (tree_mod_dont_log(fs_info, NULL)) | 651 | if (tree_mod_dont_log(fs_info, NULL)) |
| 652 | return 0; | 652 | return 0; |
| 653 | 653 | ||
| 654 | __tree_mod_log_free_eb(fs_info, old_root); | ||
| 655 | |||
| 654 | ret = tree_mod_alloc(fs_info, flags, &tm); | 656 | ret = tree_mod_alloc(fs_info, flags, &tm); |
| 655 | if (ret < 0) | 657 | if (ret < 0) |
| 656 | goto out; | 658 | goto out; |
| @@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | |||
| 736 | static noinline void | 738 | static noinline void |
| 737 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | 739 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, |
| 738 | struct extent_buffer *src, unsigned long dst_offset, | 740 | struct extent_buffer *src, unsigned long dst_offset, |
| 739 | unsigned long src_offset, int nr_items) | 741 | unsigned long src_offset, int nr_items, int log_removal) |
| 740 | { | 742 | { |
| 741 | int ret; | 743 | int ret; |
| 742 | int i; | 744 | int i; |
| @@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
| 750 | } | 752 | } |
| 751 | 753 | ||
| 752 | for (i = 0; i < nr_items; i++) { | 754 | for (i = 0; i < nr_items; i++) { |
| 753 | ret = tree_mod_log_insert_key_locked(fs_info, src, | 755 | if (log_removal) { |
| 754 | i + src_offset, | 756 | ret = tree_mod_log_insert_key_locked(fs_info, src, |
| 755 | MOD_LOG_KEY_REMOVE); | 757 | i + src_offset, |
| 756 | BUG_ON(ret < 0); | 758 | MOD_LOG_KEY_REMOVE); |
| 759 | BUG_ON(ret < 0); | ||
| 760 | } | ||
| 757 | ret = tree_mod_log_insert_key_locked(fs_info, dst, | 761 | ret = tree_mod_log_insert_key_locked(fs_info, dst, |
| 758 | i + dst_offset, | 762 | i + dst_offset, |
| 759 | MOD_LOG_KEY_ADD); | 763 | MOD_LOG_KEY_ADD); |
| @@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
| 927 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); | 931 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
| 928 | BUG_ON(ret); /* -ENOMEM */ | 932 | BUG_ON(ret); /* -ENOMEM */ |
| 929 | } | 933 | } |
| 930 | tree_mod_log_free_eb(root->fs_info, buf); | ||
| 931 | clean_tree_block(trans, root, buf); | 934 | clean_tree_block(trans, root, buf); |
| 932 | *last_ref = 1; | 935 | *last_ref = 1; |
| 933 | } | 936 | } |
| @@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1046 | btrfs_set_node_ptr_generation(parent, parent_slot, | 1049 | btrfs_set_node_ptr_generation(parent, parent_slot, |
| 1047 | trans->transid); | 1050 | trans->transid); |
| 1048 | btrfs_mark_buffer_dirty(parent); | 1051 | btrfs_mark_buffer_dirty(parent); |
| 1052 | tree_mod_log_free_eb(root->fs_info, buf); | ||
| 1049 | btrfs_free_tree_block(trans, root, buf, parent_start, | 1053 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 1050 | last_ref); | 1054 | last_ref); |
| 1051 | } | 1055 | } |
| @@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1750 | goto enospc; | 1754 | goto enospc; |
| 1751 | } | 1755 | } |
| 1752 | 1756 | ||
| 1753 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
| 1754 | tree_mod_log_set_root_pointer(root, child); | 1757 | tree_mod_log_set_root_pointer(root, child); |
| 1755 | rcu_assign_pointer(root->node, child); | 1758 | rcu_assign_pointer(root->node, child); |
| 1756 | 1759 | ||
| @@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
| 2995 | push_items = min(src_nritems - 8, push_items); | 2998 | push_items = min(src_nritems - 8, push_items); |
| 2996 | 2999 | ||
| 2997 | tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, | 3000 | tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, |
| 2998 | push_items); | 3001 | push_items, 1); |
| 2999 | copy_extent_buffer(dst, src, | 3002 | copy_extent_buffer(dst, src, |
| 3000 | btrfs_node_key_ptr_offset(dst_nritems), | 3003 | btrfs_node_key_ptr_offset(dst_nritems), |
| 3001 | btrfs_node_key_ptr_offset(0), | 3004 | btrfs_node_key_ptr_offset(0), |
| @@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 3066 | sizeof(struct btrfs_key_ptr)); | 3069 | sizeof(struct btrfs_key_ptr)); |
| 3067 | 3070 | ||
| 3068 | tree_mod_log_eb_copy(root->fs_info, dst, src, 0, | 3071 | tree_mod_log_eb_copy(root->fs_info, dst, src, 0, |
| 3069 | src_nritems - push_items, push_items); | 3072 | src_nritems - push_items, push_items, 1); |
| 3070 | copy_extent_buffer(dst, src, | 3073 | copy_extent_buffer(dst, src, |
| 3071 | btrfs_node_key_ptr_offset(0), | 3074 | btrfs_node_key_ptr_offset(0), |
| 3072 | btrfs_node_key_ptr_offset(src_nritems - push_items), | 3075 | btrfs_node_key_ptr_offset(src_nritems - push_items), |
| @@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 3218 | int mid; | 3221 | int mid; |
| 3219 | int ret; | 3222 | int ret; |
| 3220 | u32 c_nritems; | 3223 | u32 c_nritems; |
| 3224 | int tree_mod_log_removal = 1; | ||
| 3221 | 3225 | ||
| 3222 | c = path->nodes[level]; | 3226 | c = path->nodes[level]; |
| 3223 | WARN_ON(btrfs_header_generation(c) != trans->transid); | 3227 | WARN_ON(btrfs_header_generation(c) != trans->transid); |
| 3224 | if (c == root->node) { | 3228 | if (c == root->node) { |
| 3225 | /* trying to split the root, lets make a new one */ | 3229 | /* trying to split the root, lets make a new one */ |
| 3226 | ret = insert_new_root(trans, root, path, level + 1); | 3230 | ret = insert_new_root(trans, root, path, level + 1); |
| 3231 | /* | ||
| 3232 | * removal of root nodes has been logged by | ||
| 3233 | * tree_mod_log_set_root_pointer due to locking | ||
| 3234 | */ | ||
| 3235 | tree_mod_log_removal = 0; | ||
| 3227 | if (ret) | 3236 | if (ret) |
| 3228 | return ret; | 3237 | return ret; |
| 3229 | } else { | 3238 | } else { |
| @@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 3261 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 3270 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
| 3262 | BTRFS_UUID_SIZE); | 3271 | BTRFS_UUID_SIZE); |
| 3263 | 3272 | ||
| 3264 | tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); | 3273 | tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid, |
| 3274 | tree_mod_log_removal); | ||
| 3265 | copy_extent_buffer(split, c, | 3275 | copy_extent_buffer(split, c, |
| 3266 | btrfs_node_key_ptr_offset(0), | 3276 | btrfs_node_key_ptr_offset(0), |
| 3267 | btrfs_node_key_ptr_offset(mid), | 3277 | btrfs_node_key_ptr_offset(mid), |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7d84651e850b..6d19a0a554aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 1291 | 0, objectid, NULL, 0, 0, 0); | 1291 | 0, objectid, NULL, 0, 0, 0); |
| 1292 | if (IS_ERR(leaf)) { | 1292 | if (IS_ERR(leaf)) { |
| 1293 | ret = PTR_ERR(leaf); | 1293 | ret = PTR_ERR(leaf); |
| 1294 | leaf = NULL; | ||
| 1294 | goto fail; | 1295 | goto fail; |
| 1295 | } | 1296 | } |
| 1296 | 1297 | ||
| @@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 1334 | 1335 | ||
| 1335 | btrfs_tree_unlock(leaf); | 1336 | btrfs_tree_unlock(leaf); |
| 1336 | 1337 | ||
| 1338 | return root; | ||
| 1339 | |||
| 1337 | fail: | 1340 | fail: |
| 1338 | if (ret) | 1341 | if (leaf) { |
| 1339 | return ERR_PTR(ret); | 1342 | btrfs_tree_unlock(leaf); |
| 1343 | free_extent_buffer(leaf); | ||
| 1344 | } | ||
| 1345 | kfree(root); | ||
| 1340 | 1346 | ||
| 1341 | return root; | 1347 | return ERR_PTR(ret); |
| 1342 | } | 1348 | } |
| 1343 | 1349 | ||
| 1344 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 1350 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
| @@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | |||
| 3253 | if (btrfs_root_refs(&root->root_item) == 0) | 3259 | if (btrfs_root_refs(&root->root_item) == 0) |
| 3254 | synchronize_srcu(&fs_info->subvol_srcu); | 3260 | synchronize_srcu(&fs_info->subvol_srcu); |
| 3255 | 3261 | ||
| 3256 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 3262 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
| 3257 | btrfs_free_log(NULL, root); | 3263 | btrfs_free_log(NULL, root); |
| 3258 | btrfs_free_log_root_tree(NULL, fs_info); | 3264 | btrfs_free_log_root_tree(NULL, fs_info); |
| 3259 | } | 3265 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ac2eca681eb..3d551231caba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root, | |||
| 257 | cache->bytes_super += stripe_len; | 257 | cache->bytes_super += stripe_len; |
| 258 | ret = add_excluded_extent(root, cache->key.objectid, | 258 | ret = add_excluded_extent(root, cache->key.objectid, |
| 259 | stripe_len); | 259 | stripe_len); |
| 260 | BUG_ON(ret); /* -ENOMEM */ | 260 | if (ret) |
| 261 | return ret; | ||
| 261 | } | 262 | } |
| 262 | 263 | ||
| 263 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | 264 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { |
| @@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root, | |||
| 265 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, | 266 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, |
| 266 | cache->key.objectid, bytenr, | 267 | cache->key.objectid, bytenr, |
| 267 | 0, &logical, &nr, &stripe_len); | 268 | 0, &logical, &nr, &stripe_len); |
| 268 | BUG_ON(ret); /* -ENOMEM */ | 269 | if (ret) |
| 270 | return ret; | ||
| 269 | 271 | ||
| 270 | while (nr--) { | 272 | while (nr--) { |
| 271 | cache->bytes_super += stripe_len; | 273 | cache->bytes_super += stripe_len; |
| 272 | ret = add_excluded_extent(root, logical[nr], | 274 | ret = add_excluded_extent(root, logical[nr], |
| 273 | stripe_len); | 275 | stripe_len); |
| 274 | BUG_ON(ret); /* -ENOMEM */ | 276 | if (ret) { |
| 277 | kfree(logical); | ||
| 278 | return ret; | ||
| 279 | } | ||
| 275 | } | 280 | } |
| 276 | 281 | ||
| 277 | kfree(logical); | 282 | kfree(logical); |
| @@ -4438,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 4438 | spin_lock(&sinfo->lock); | 4443 | spin_lock(&sinfo->lock); |
| 4439 | spin_lock(&block_rsv->lock); | 4444 | spin_lock(&block_rsv->lock); |
| 4440 | 4445 | ||
| 4441 | block_rsv->size = num_bytes; | 4446 | block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); |
| 4442 | 4447 | ||
| 4443 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | 4448 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + |
| 4444 | sinfo->bytes_reserved + sinfo->bytes_readonly + | 4449 | sinfo->bytes_reserved + sinfo->bytes_readonly + |
| @@ -4793,14 +4798,49 @@ out_fail: | |||
| 4793 | * If the inodes csum_bytes is the same as the original | 4798 | * If the inodes csum_bytes is the same as the original |
| 4794 | * csum_bytes then we know we haven't raced with any free()ers | 4799 | * csum_bytes then we know we haven't raced with any free()ers |
| 4795 | * so we can just reduce our inodes csum bytes and carry on. | 4800 | * so we can just reduce our inodes csum bytes and carry on. |
| 4796 | * Otherwise we have to do the normal free thing to account for | ||
| 4797 | * the case that the free side didn't free up its reserve | ||
| 4798 | * because of this outstanding reservation. | ||
| 4799 | */ | 4801 | */ |
| 4800 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) | 4802 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) { |
| 4801 | calc_csum_metadata_size(inode, num_bytes, 0); | 4803 | calc_csum_metadata_size(inode, num_bytes, 0); |
| 4802 | else | 4804 | } else { |
| 4803 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | 4805 | u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes; |
| 4806 | u64 bytes; | ||
| 4807 | |||
| 4808 | /* | ||
| 4809 | * This is tricky, but first we need to figure out how much we | ||
| 4810 | * free'd from any free-ers that occured during this | ||
| 4811 | * reservation, so we reset ->csum_bytes to the csum_bytes | ||
| 4812 | * before we dropped our lock, and then call the free for the | ||
| 4813 | * number of bytes that were freed while we were trying our | ||
| 4814 | * reservation. | ||
| 4815 | */ | ||
| 4816 | bytes = csum_bytes - BTRFS_I(inode)->csum_bytes; | ||
| 4817 | BTRFS_I(inode)->csum_bytes = csum_bytes; | ||
| 4818 | to_free = calc_csum_metadata_size(inode, bytes, 0); | ||
| 4819 | |||
| 4820 | |||
| 4821 | /* | ||
| 4822 | * Now we need to see how much we would have freed had we not | ||
| 4823 | * been making this reservation and our ->csum_bytes were not | ||
| 4824 | * artificially inflated. | ||
| 4825 | */ | ||
| 4826 | BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes; | ||
| 4827 | bytes = csum_bytes - orig_csum_bytes; | ||
| 4828 | bytes = calc_csum_metadata_size(inode, bytes, 0); | ||
| 4829 | |||
| 4830 | /* | ||
| 4831 | * Now reset ->csum_bytes to what it should be. If bytes is | ||
| 4832 | * more than to_free then we would have free'd more space had we | ||
| 4833 | * not had an artificially high ->csum_bytes, so we need to free | ||
| 4834 | * the remainder. If bytes is the same or less then we don't | ||
| 4835 | * need to do anything, the other free-ers did the correct | ||
| 4836 | * thing. | ||
| 4837 | */ | ||
| 4838 | BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes; | ||
| 4839 | if (bytes > to_free) | ||
| 4840 | to_free = bytes - to_free; | ||
| 4841 | else | ||
| 4842 | to_free = 0; | ||
| 4843 | } | ||
| 4804 | spin_unlock(&BTRFS_I(inode)->lock); | 4844 | spin_unlock(&BTRFS_I(inode)->lock); |
| 4805 | if (dropped) | 4845 | if (dropped) |
| 4806 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4846 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
| @@ -7947,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7947 | * info has super bytes accounted for, otherwise we'll think | 7987 | * info has super bytes accounted for, otherwise we'll think |
| 7948 | * we have more space than we actually do. | 7988 | * we have more space than we actually do. |
| 7949 | */ | 7989 | */ |
| 7950 | exclude_super_stripes(root, cache); | 7990 | ret = exclude_super_stripes(root, cache); |
| 7991 | if (ret) { | ||
| 7992 | /* | ||
| 7993 | * We may have excluded something, so call this just in | ||
| 7994 | * case. | ||
| 7995 | */ | ||
| 7996 | free_excluded_extents(root, cache); | ||
| 7997 | kfree(cache->free_space_ctl); | ||
| 7998 | kfree(cache); | ||
| 7999 | goto error; | ||
| 8000 | } | ||
| 7951 | 8001 | ||
| 7952 | /* | 8002 | /* |
| 7953 | * check for two cases, either we are full, and therefore | 8003 | * check for two cases, either we are full, and therefore |
| @@ -8089,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 8089 | 8139 | ||
| 8090 | cache->last_byte_to_unpin = (u64)-1; | 8140 | cache->last_byte_to_unpin = (u64)-1; |
| 8091 | cache->cached = BTRFS_CACHE_FINISHED; | 8141 | cache->cached = BTRFS_CACHE_FINISHED; |
| 8092 | exclude_super_stripes(root, cache); | 8142 | ret = exclude_super_stripes(root, cache); |
| 8143 | if (ret) { | ||
| 8144 | /* | ||
| 8145 | * We may have excluded something, so call this just in | ||
| 8146 | * case. | ||
| 8147 | */ | ||
| 8148 | free_excluded_extents(root, cache); | ||
| 8149 | kfree(cache->free_space_ctl); | ||
| 8150 | kfree(cache); | ||
| 8151 | return ret; | ||
| 8152 | } | ||
| 8093 | 8153 | ||
| 8094 | add_new_free_space(cache, root->fs_info, chunk_offset, | 8154 | add_new_free_space(cache, root->fs_info, chunk_offset, |
| 8095 | chunk_offset + size); | 8155 | chunk_offset + size); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f173c5af6461..cdee391fc7bf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 1257 | GFP_NOFS); | 1257 | GFP_NOFS); |
| 1258 | } | 1258 | } |
| 1259 | 1259 | ||
| 1260 | int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) | ||
| 1261 | { | ||
| 1262 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 1263 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 1264 | struct page *page; | ||
| 1265 | |||
| 1266 | while (index <= end_index) { | ||
| 1267 | page = find_get_page(inode->i_mapping, index); | ||
| 1268 | BUG_ON(!page); /* Pages should be in the extent_io_tree */ | ||
| 1269 | clear_page_dirty_for_io(page); | ||
| 1270 | page_cache_release(page); | ||
| 1271 | index++; | ||
| 1272 | } | ||
| 1273 | return 0; | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) | ||
| 1277 | { | ||
| 1278 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 1279 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
| 1280 | struct page *page; | ||
| 1281 | |||
| 1282 | while (index <= end_index) { | ||
| 1283 | page = find_get_page(inode->i_mapping, index); | ||
| 1284 | BUG_ON(!page); /* Pages should be in the extent_io_tree */ | ||
| 1285 | account_page_redirty(page); | ||
| 1286 | __set_page_dirty_nobuffers(page); | ||
| 1287 | page_cache_release(page); | ||
| 1288 | index++; | ||
| 1289 | } | ||
| 1290 | return 0; | ||
| 1291 | } | ||
| 1292 | |||
| 1260 | /* | 1293 | /* |
| 1261 | * helper function to set both pages and extents in the tree writeback | 1294 | * helper function to set both pages and extents in the tree writeback |
| 1262 | */ | 1295 | */ |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6068a1985560..258c92156857 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | |||
| 325 | unsigned long *map_len); | 325 | unsigned long *map_len); |
| 326 | int extent_range_uptodate(struct extent_io_tree *tree, | 326 | int extent_range_uptodate(struct extent_io_tree *tree, |
| 327 | u64 start, u64 end); | 327 | u64 start, u64 end); |
| 328 | int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); | ||
| 329 | int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); | ||
| 328 | int extent_clear_unlock_delalloc(struct inode *inode, | 330 | int extent_clear_unlock_delalloc(struct inode *inode, |
| 329 | struct extent_io_tree *tree, | 331 | struct extent_io_tree *tree, |
| 330 | u64 start, u64 end, struct page *locked_page, | 332 | u64 start, u64 end, struct page *locked_page, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ec160202be3e..c4628a201cb3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
| 118 | csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); | 118 | csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); |
| 119 | csums_in_item /= csum_size; | 119 | csums_in_item /= csum_size; |
| 120 | 120 | ||
| 121 | if (csum_offset >= csums_in_item) { | 121 | if (csum_offset == csums_in_item) { |
| 122 | ret = -EFBIG; | 122 | ret = -EFBIG; |
| 123 | goto fail; | 123 | goto fail; |
| 124 | } else if (csum_offset > csums_in_item) { | ||
| 125 | goto fail; | ||
| 124 | } | 126 | } |
| 125 | } | 127 | } |
| 126 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 128 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
| @@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
| 728 | return -ENOMEM; | 730 | return -ENOMEM; |
| 729 | 731 | ||
| 730 | sector_sum = sums->sums; | 732 | sector_sum = sums->sums; |
| 731 | trans->adding_csums = 1; | ||
| 732 | again: | 733 | again: |
| 733 | next_offset = (u64)-1; | 734 | next_offset = (u64)-1; |
| 734 | found_next = 0; | 735 | found_next = 0; |
| @@ -899,7 +900,6 @@ next_sector: | |||
| 899 | goto again; | 900 | goto again; |
| 900 | } | 901 | } |
| 901 | out: | 902 | out: |
| 902 | trans->adding_csums = 0; | ||
| 903 | btrfs_free_path(path); | 903 | btrfs_free_path(path); |
| 904 | return ret; | 904 | return ret; |
| 905 | 905 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5b4ea5f55b8f..ade03e6f7bd2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -2142,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2142 | { | 2142 | { |
| 2143 | struct inode *inode = file_inode(file); | 2143 | struct inode *inode = file_inode(file); |
| 2144 | struct extent_state *cached_state = NULL; | 2144 | struct extent_state *cached_state = NULL; |
| 2145 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2145 | u64 cur_offset; | 2146 | u64 cur_offset; |
| 2146 | u64 last_byte; | 2147 | u64 last_byte; |
| 2147 | u64 alloc_start; | 2148 | u64 alloc_start; |
| @@ -2169,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2169 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | 2170 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
| 2170 | if (ret) | 2171 | if (ret) |
| 2171 | return ret; | 2172 | return ret; |
| 2173 | if (root->fs_info->quota_enabled) { | ||
| 2174 | ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start); | ||
| 2175 | if (ret) | ||
| 2176 | goto out_reserve_fail; | ||
| 2177 | } | ||
| 2172 | 2178 | ||
| 2173 | /* | 2179 | /* |
| 2174 | * wait for ordered IO before we have any locks. We'll loop again | 2180 | * wait for ordered IO before we have any locks. We'll loop again |
| @@ -2272,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2272 | &cached_state, GFP_NOFS); | 2278 | &cached_state, GFP_NOFS); |
| 2273 | out: | 2279 | out: |
| 2274 | mutex_unlock(&inode->i_mutex); | 2280 | mutex_unlock(&inode->i_mutex); |
| 2281 | if (root->fs_info->quota_enabled) | ||
| 2282 | btrfs_qgroup_free(root, alloc_end - alloc_start); | ||
| 2283 | out_reserve_fail: | ||
| 2275 | /* Let go of our reservation. */ | 2284 | /* Let go of our reservation. */ |
| 2276 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | 2285 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
| 2277 | return ret; | 2286 | return ret; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ca1b767d51f7..09c58a35b429 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode, | |||
| 353 | int i; | 353 | int i; |
| 354 | int will_compress; | 354 | int will_compress; |
| 355 | int compress_type = root->fs_info->compress_type; | 355 | int compress_type = root->fs_info->compress_type; |
| 356 | int redirty = 0; | ||
| 356 | 357 | ||
| 357 | /* if this is a small write inside eof, kick off a defrag */ | 358 | /* if this is a small write inside eof, kick off a defrag */ |
| 358 | if ((end - start + 1) < 16 * 1024 && | 359 | if ((end - start + 1) < 16 * 1024 && |
| @@ -415,6 +416,17 @@ again: | |||
| 415 | if (BTRFS_I(inode)->force_compress) | 416 | if (BTRFS_I(inode)->force_compress) |
| 416 | compress_type = BTRFS_I(inode)->force_compress; | 417 | compress_type = BTRFS_I(inode)->force_compress; |
| 417 | 418 | ||
| 419 | /* | ||
| 420 | * we need to call clear_page_dirty_for_io on each | ||
| 421 | * page in the range. Otherwise applications with the file | ||
| 422 | * mmap'd can wander in and change the page contents while | ||
| 423 | * we are compressing them. | ||
| 424 | * | ||
| 425 | * If the compression fails for any reason, we set the pages | ||
| 426 | * dirty again later on. | ||
| 427 | */ | ||
| 428 | extent_range_clear_dirty_for_io(inode, start, end); | ||
| 429 | redirty = 1; | ||
| 418 | ret = btrfs_compress_pages(compress_type, | 430 | ret = btrfs_compress_pages(compress_type, |
| 419 | inode->i_mapping, start, | 431 | inode->i_mapping, start, |
| 420 | total_compressed, pages, | 432 | total_compressed, pages, |
| @@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed: | |||
| 554 | __set_page_dirty_nobuffers(locked_page); | 566 | __set_page_dirty_nobuffers(locked_page); |
| 555 | /* unlocked later on in the async handlers */ | 567 | /* unlocked later on in the async handlers */ |
| 556 | } | 568 | } |
| 569 | if (redirty) | ||
| 570 | extent_range_redirty_for_io(inode, start, end); | ||
| 557 | add_async_extent(async_cow, start, end - start + 1, | 571 | add_async_extent(async_cow, start, end - start + 1, |
| 558 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | 572 | 0, NULL, 0, BTRFS_COMPRESS_NONE); |
| 559 | *num_added += 1; | 573 | *num_added += 1; |
| @@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
| 1743 | struct btrfs_ordered_sum *sum; | 1757 | struct btrfs_ordered_sum *sum; |
| 1744 | 1758 | ||
| 1745 | list_for_each_entry(sum, list, list) { | 1759 | list_for_each_entry(sum, list, list) { |
| 1760 | trans->adding_csums = 1; | ||
| 1746 | btrfs_csum_file_blocks(trans, | 1761 | btrfs_csum_file_blocks(trans, |
| 1747 | BTRFS_I(inode)->root->fs_info->csum_root, sum); | 1762 | BTRFS_I(inode)->root->fs_info->csum_root, sum); |
| 1763 | trans->adding_csums = 0; | ||
| 1748 | } | 1764 | } |
| 1749 | return 0; | 1765 | return 0; |
| 1750 | } | 1766 | } |
| @@ -3679,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 3679 | * 1 for the dir item | 3695 | * 1 for the dir item |
| 3680 | * 1 for the dir index | 3696 | * 1 for the dir index |
| 3681 | * 1 for the inode ref | 3697 | * 1 for the inode ref |
| 3682 | * 1 for the inode ref in the tree log | ||
| 3683 | * 2 for the dir entries in the log | ||
| 3684 | * 1 for the inode | 3698 | * 1 for the inode |
| 3685 | */ | 3699 | */ |
| 3686 | trans = btrfs_start_transaction(root, 8); | 3700 | trans = btrfs_start_transaction(root, 5); |
| 3687 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 3701 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
| 3688 | return trans; | 3702 | return trans; |
| 3689 | 3703 | ||
| @@ -8127,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 8127 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | 8141 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items |
| 8128 | * should cover the worst case number of items we'll modify. | 8142 | * should cover the worst case number of items we'll modify. |
| 8129 | */ | 8143 | */ |
| 8130 | trans = btrfs_start_transaction(root, 20); | 8144 | trans = btrfs_start_transaction(root, 11); |
| 8131 | if (IS_ERR(trans)) { | 8145 | if (IS_ERR(trans)) { |
| 8132 | ret = PTR_ERR(trans); | 8146 | ret = PTR_ERR(trans); |
| 8133 | goto out_notrans; | 8147 | goto out_notrans; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index dc08d77b717e..005c45db699e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
| 557 | INIT_LIST_HEAD(&splice); | 557 | INIT_LIST_HEAD(&splice); |
| 558 | INIT_LIST_HEAD(&works); | 558 | INIT_LIST_HEAD(&works); |
| 559 | 559 | ||
| 560 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
| 560 | spin_lock(&root->fs_info->ordered_extent_lock); | 561 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 561 | list_splice_init(&root->fs_info->ordered_extents, &splice); | 562 | list_splice_init(&root->fs_info->ordered_extents, &splice); |
| 562 | while (!list_empty(&splice)) { | 563 | while (!list_empty(&splice)) { |
| @@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
| 600 | 601 | ||
| 601 | cond_resched(); | 602 | cond_resched(); |
| 602 | } | 603 | } |
| 604 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
| 603 | } | 605 | } |
| 604 | 606 | ||
| 605 | /* | 607 | /* |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5471e47d6559..b44124dd2370 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1153 | ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, | 1153 | ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, |
| 1154 | sgn > 0 ? node->seq - 1 : node->seq, &roots); | 1154 | sgn > 0 ? node->seq - 1 : node->seq, &roots); |
| 1155 | if (ret < 0) | 1155 | if (ret < 0) |
| 1156 | goto out; | 1156 | return ret; |
| 1157 | 1157 | ||
| 1158 | spin_lock(&fs_info->qgroup_lock); | 1158 | spin_lock(&fs_info->qgroup_lock); |
| 1159 | quota_root = fs_info->quota_root; | 1159 | quota_root = fs_info->quota_root; |
| @@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1275 | ret = 0; | 1275 | ret = 0; |
| 1276 | unlock: | 1276 | unlock: |
| 1277 | spin_unlock(&fs_info->qgroup_lock); | 1277 | spin_unlock(&fs_info->qgroup_lock); |
| 1278 | out: | ||
| 1279 | ulist_free(roots); | 1278 | ulist_free(roots); |
| 1280 | ulist_free(tmp); | 1279 | ulist_free(tmp); |
| 1281 | 1280 | ||
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 53c3501fa4ca..85e072b956d5 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 542 | eb = path->nodes[0]; | 542 | eb = path->nodes[0]; |
| 543 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | 543 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); |
| 544 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | 544 | item_size = btrfs_item_size_nr(eb, path->slots[0]); |
| 545 | btrfs_release_path(path); | ||
| 546 | 545 | ||
| 547 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 546 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 548 | do { | 547 | do { |
| @@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 558 | ret < 0 ? -1 : ref_level, | 557 | ret < 0 ? -1 : ref_level, |
| 559 | ret < 0 ? -1 : ref_root); | 558 | ret < 0 ? -1 : ref_root); |
| 560 | } while (ret != 1); | 559 | } while (ret != 1); |
| 560 | btrfs_release_path(path); | ||
| 561 | } else { | 561 | } else { |
| 562 | btrfs_release_path(path); | ||
| 562 | swarn.path = path; | 563 | swarn.path = path; |
| 563 | swarn.dev = dev; | 564 | swarn.dev = dev; |
| 564 | iterate_extent_inodes(fs_info, found_key.objectid, | 565 | iterate_extent_inodes(fs_info, found_key.objectid, |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f7a8b861058b..c85e7c6b4598 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3945 | found_key.type != key.type) { | 3945 | found_key.type != key.type) { |
| 3946 | key.offset += right_len; | 3946 | key.offset += right_len; |
| 3947 | break; | 3947 | break; |
| 3948 | } else { | 3948 | } |
| 3949 | if (found_key.offset != key.offset + right_len) { | 3949 | if (found_key.offset != key.offset + right_len) { |
| 3950 | /* Should really not happen */ | 3950 | ret = 0; |
| 3951 | ret = -EIO; | 3951 | goto out; |
| 3952 | goto out; | ||
| 3953 | } | ||
| 3954 | } | 3952 | } |
| 3955 | key = found_key; | 3953 | key = found_key; |
| 3956 | } | 3954 | } |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5989a92236f7..2854c824ab64 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -4935,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 4935 | em = lookup_extent_mapping(em_tree, chunk_start, 1); | 4935 | em = lookup_extent_mapping(em_tree, chunk_start, 1); |
| 4936 | read_unlock(&em_tree->lock); | 4936 | read_unlock(&em_tree->lock); |
| 4937 | 4937 | ||
| 4938 | BUG_ON(!em || em->start != chunk_start); | 4938 | if (!em) { |
| 4939 | printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", | ||
| 4940 | chunk_start); | ||
| 4941 | return -EIO; | ||
| 4942 | } | ||
| 4943 | |||
| 4944 | if (em->start != chunk_start) { | ||
| 4945 | printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", | ||
| 4946 | em->start, chunk_start); | ||
| 4947 | free_extent_map(em); | ||
| 4948 | return -EIO; | ||
| 4949 | } | ||
| 4939 | map = (struct map_lookup *)em->bdev; | 4950 | map = (struct map_lookup *)em->bdev; |
| 4940 | 4951 | ||
| 4941 | length = em->len; | 4952 | length = em->len; |
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cfd1ce34e0bc..1d36db114772 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c | |||
| @@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
| 614 | } | 614 | } |
| 615 | } | 615 | } |
| 616 | 616 | ||
| 617 | /* mechlistMIC */ | 617 | /* |
| 618 | if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { | 618 | * We currently ignore anything at the end of the SPNEGO blob after |
| 619 | /* Check if we have reached the end of the blob, but with | 619 | * the mechTypes have been parsed, since none of that info is |
| 620 | no mechListMic (e.g. NTLMSSP instead of KRB5) */ | 620 | * used at the moment. |
| 621 | if (ctx.error == ASN1_ERR_DEC_EMPTY) | 621 | */ |
| 622 | goto decode_negtoken_exit; | ||
| 623 | cFYI(1, "Error decoding last part negTokenInit exit3"); | ||
| 624 | return 0; | ||
| 625 | } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { | ||
| 626 | /* tag = 3 indicating mechListMIC */ | ||
| 627 | cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)", | ||
| 628 | cls, con, tag, end, *end); | ||
| 629 | return 0; | ||
| 630 | } | ||
| 631 | |||
| 632 | /* sequence */ | ||
| 633 | if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { | ||
| 634 | cFYI(1, "Error decoding last part negTokenInit exit5"); | ||
| 635 | return 0; | ||
| 636 | } else if ((cls != ASN1_UNI) || (con != ASN1_CON) | ||
| 637 | || (tag != ASN1_SEQ)) { | ||
| 638 | cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)", | ||
| 639 | cls, con, tag, end, *end); | ||
| 640 | } | ||
| 641 | |||
| 642 | /* sequence of */ | ||
| 643 | if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { | ||
| 644 | cFYI(1, "Error decoding last part negTokenInit exit 7"); | ||
| 645 | return 0; | ||
| 646 | } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { | ||
| 647 | cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)", | ||
| 648 | cls, con, tag, end, *end); | ||
| 649 | return 0; | ||
| 650 | } | ||
| 651 | |||
| 652 | /* general string */ | ||
| 653 | if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { | ||
| 654 | cFYI(1, "Error decoding last part negTokenInit exit9"); | ||
| 655 | return 0; | ||
| 656 | } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) | ||
| 657 | || (tag != ASN1_GENSTR)) { | ||
| 658 | cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)", | ||
| 659 | cls, con, tag, end, *end); | ||
| 660 | return 0; | ||
| 661 | } | ||
| 662 | cFYI(1, "Need to call asn1_octets_decode() function for %s", | ||
| 663 | ctx.pointer); /* is this UTF-8 or ASCII? */ | ||
| 664 | decode_negtoken_exit: | ||
| 665 | return 1; | 622 | return 1; |
| 666 | } | 623 | } |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3cf8a15af916..345fc89c4286 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -91,6 +91,30 @@ struct workqueue_struct *cifsiod_wq; | |||
| 91 | __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; | 91 | __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; |
| 92 | #endif | 92 | #endif |
| 93 | 93 | ||
| 94 | /* | ||
| 95 | * Bumps refcount for cifs super block. | ||
| 96 | * Note that it should be only called if a referece to VFS super block is | ||
| 97 | * already held, e.g. in open-type syscalls context. Otherwise it can race with | ||
| 98 | * atomic_dec_and_test in deactivate_locked_super. | ||
| 99 | */ | ||
| 100 | void | ||
| 101 | cifs_sb_active(struct super_block *sb) | ||
| 102 | { | ||
| 103 | struct cifs_sb_info *server = CIFS_SB(sb); | ||
| 104 | |||
| 105 | if (atomic_inc_return(&server->active) == 1) | ||
| 106 | atomic_inc(&sb->s_active); | ||
| 107 | } | ||
| 108 | |||
| 109 | void | ||
| 110 | cifs_sb_deactive(struct super_block *sb) | ||
| 111 | { | ||
| 112 | struct cifs_sb_info *server = CIFS_SB(sb); | ||
| 113 | |||
| 114 | if (atomic_dec_and_test(&server->active)) | ||
| 115 | deactivate_super(sb); | ||
| 116 | } | ||
| 117 | |||
| 94 | static int | 118 | static int |
| 95 | cifs_read_super(struct super_block *sb) | 119 | cifs_read_super(struct super_block *sb) |
| 96 | { | 120 | { |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 7163419cecd9..0e32c3446ce9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
| @@ -41,6 +41,10 @@ extern struct file_system_type cifs_fs_type; | |||
| 41 | extern const struct address_space_operations cifs_addr_ops; | 41 | extern const struct address_space_operations cifs_addr_ops; |
| 42 | extern const struct address_space_operations cifs_addr_ops_smallbuf; | 42 | extern const struct address_space_operations cifs_addr_ops_smallbuf; |
| 43 | 43 | ||
| 44 | /* Functions related to super block operations */ | ||
| 45 | extern void cifs_sb_active(struct super_block *sb); | ||
| 46 | extern void cifs_sb_deactive(struct super_block *sb); | ||
| 47 | |||
| 44 | /* Functions related to inodes */ | 48 | /* Functions related to inodes */ |
| 45 | extern const struct inode_operations cifs_dir_inode_ops; | 49 | extern const struct inode_operations cifs_dir_inode_ops; |
| 46 | extern struct inode *cifs_root_iget(struct super_block *); | 50 | extern struct inode *cifs_root_iget(struct super_block *); |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8c0d85577314..7a0dd99e4507 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -300,6 +300,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, | |||
| 300 | INIT_WORK(&cfile->oplock_break, cifs_oplock_break); | 300 | INIT_WORK(&cfile->oplock_break, cifs_oplock_break); |
| 301 | mutex_init(&cfile->fh_mutex); | 301 | mutex_init(&cfile->fh_mutex); |
| 302 | 302 | ||
| 303 | cifs_sb_active(inode->i_sb); | ||
| 304 | |||
| 303 | /* | 305 | /* |
| 304 | * If the server returned a read oplock and we have mandatory brlocks, | 306 | * If the server returned a read oplock and we have mandatory brlocks, |
| 305 | * set oplock level to None. | 307 | * set oplock level to None. |
| @@ -349,7 +351,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
| 349 | struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); | 351 | struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); |
| 350 | struct TCP_Server_Info *server = tcon->ses->server; | 352 | struct TCP_Server_Info *server = tcon->ses->server; |
| 351 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 353 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
| 352 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 354 | struct super_block *sb = inode->i_sb; |
| 355 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
| 353 | struct cifsLockInfo *li, *tmp; | 356 | struct cifsLockInfo *li, *tmp; |
| 354 | struct cifs_fid fid; | 357 | struct cifs_fid fid; |
| 355 | struct cifs_pending_open open; | 358 | struct cifs_pending_open open; |
| @@ -414,6 +417,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
| 414 | 417 | ||
| 415 | cifs_put_tlink(cifs_file->tlink); | 418 | cifs_put_tlink(cifs_file->tlink); |
| 416 | dput(cifs_file->dentry); | 419 | dput(cifs_file->dentry); |
| 420 | cifs_sb_deactive(sb); | ||
| 417 | kfree(cifs_file); | 421 | kfree(cifs_file); |
| 418 | } | 422 | } |
| 419 | 423 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 0079696305c9..20887bf63121 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
| @@ -1043,7 +1043,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, | |||
| 1043 | cifs_sb->mnt_cifs_flags & | 1043 | cifs_sb->mnt_cifs_flags & |
| 1044 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1044 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 1045 | if (rc != 0) { | 1045 | if (rc != 0) { |
| 1046 | rc = -ETXTBSY; | 1046 | rc = -EBUSY; |
| 1047 | goto undo_setattr; | 1047 | goto undo_setattr; |
| 1048 | } | 1048 | } |
| 1049 | 1049 | ||
| @@ -1062,7 +1062,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, | |||
| 1062 | if (rc == -ENOENT) | 1062 | if (rc == -ENOENT) |
| 1063 | rc = 0; | 1063 | rc = 0; |
| 1064 | else if (rc != 0) { | 1064 | else if (rc != 0) { |
| 1065 | rc = -ETXTBSY; | 1065 | rc = -EBUSY; |
| 1066 | goto undo_rename; | 1066 | goto undo_rename; |
| 1067 | } | 1067 | } |
| 1068 | cifsInode->delete_pending = true; | 1068 | cifsInode->delete_pending = true; |
| @@ -1169,15 +1169,13 @@ psx_del_no_retry: | |||
| 1169 | cifs_drop_nlink(inode); | 1169 | cifs_drop_nlink(inode); |
| 1170 | } else if (rc == -ENOENT) { | 1170 | } else if (rc == -ENOENT) { |
| 1171 | d_drop(dentry); | 1171 | d_drop(dentry); |
| 1172 | } else if (rc == -ETXTBSY) { | 1172 | } else if (rc == -EBUSY) { |
| 1173 | if (server->ops->rename_pending_delete) { | 1173 | if (server->ops->rename_pending_delete) { |
| 1174 | rc = server->ops->rename_pending_delete(full_path, | 1174 | rc = server->ops->rename_pending_delete(full_path, |
| 1175 | dentry, xid); | 1175 | dentry, xid); |
| 1176 | if (rc == 0) | 1176 | if (rc == 0) |
| 1177 | cifs_drop_nlink(inode); | 1177 | cifs_drop_nlink(inode); |
| 1178 | } | 1178 | } |
| 1179 | if (rc == -ETXTBSY) | ||
| 1180 | rc = -EBUSY; | ||
| 1181 | } else if ((rc == -EACCES) && (dosattr == 0) && inode) { | 1179 | } else if ((rc == -EACCES) && (dosattr == 0) && inode) { |
| 1182 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); | 1180 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); |
| 1183 | if (attrs == NULL) { | 1181 | if (attrs == NULL) { |
| @@ -1518,7 +1516,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, | |||
| 1518 | * source. Note that cross directory moves do not work with | 1516 | * source. Note that cross directory moves do not work with |
| 1519 | * rename by filehandle to various Windows servers. | 1517 | * rename by filehandle to various Windows servers. |
| 1520 | */ | 1518 | */ |
| 1521 | if (rc == 0 || rc != -ETXTBSY) | 1519 | if (rc == 0 || rc != -EBUSY) |
| 1522 | goto do_rename_exit; | 1520 | goto do_rename_exit; |
| 1523 | 1521 | ||
| 1524 | /* open-file renames don't work across directories */ | 1522 | /* open-file renames don't work across directories */ |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index a82bc51fdc82..c0b25b28be6c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
| @@ -62,7 +62,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { | |||
| 62 | {ERRdiffdevice, -EXDEV}, | 62 | {ERRdiffdevice, -EXDEV}, |
| 63 | {ERRnofiles, -ENOENT}, | 63 | {ERRnofiles, -ENOENT}, |
| 64 | {ERRwriteprot, -EROFS}, | 64 | {ERRwriteprot, -EROFS}, |
| 65 | {ERRbadshare, -ETXTBSY}, | 65 | {ERRbadshare, -EBUSY}, |
| 66 | {ERRlock, -EACCES}, | 66 | {ERRlock, -EACCES}, |
| 67 | {ERRunsup, -EINVAL}, | 67 | {ERRunsup, -EINVAL}, |
| 68 | {ERRnosuchshare, -ENXIO}, | 68 | {ERRnosuchshare, -ENXIO}, |
diff --git a/fs/dcache.c b/fs/dcache.c index fbfae008ba44..e8bc3420d63e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path, | |||
| 2542 | bool slash = false; | 2542 | bool slash = false; |
| 2543 | int error = 0; | 2543 | int error = 0; |
| 2544 | 2544 | ||
| 2545 | br_read_lock(&vfsmount_lock); | ||
| 2546 | while (dentry != root->dentry || vfsmnt != root->mnt) { | 2545 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
| 2547 | struct dentry * parent; | 2546 | struct dentry * parent; |
| 2548 | 2547 | ||
| @@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path, | |||
| 2572 | if (!error && !slash) | 2571 | if (!error && !slash) |
| 2573 | error = prepend(buffer, buflen, "/", 1); | 2572 | error = prepend(buffer, buflen, "/", 1); |
| 2574 | 2573 | ||
| 2575 | out: | ||
| 2576 | br_read_unlock(&vfsmount_lock); | ||
| 2577 | return error; | 2574 | return error; |
| 2578 | 2575 | ||
| 2579 | global_root: | 2576 | global_root: |
| @@ -2590,7 +2587,7 @@ global_root: | |||
| 2590 | error = prepend(buffer, buflen, "/", 1); | 2587 | error = prepend(buffer, buflen, "/", 1); |
| 2591 | if (!error) | 2588 | if (!error) |
| 2592 | error = is_mounted(vfsmnt) ? 1 : 2; | 2589 | error = is_mounted(vfsmnt) ? 1 : 2; |
| 2593 | goto out; | 2590 | return error; |
| 2594 | } | 2591 | } |
| 2595 | 2592 | ||
| 2596 | /** | 2593 | /** |
| @@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path, | |||
| 2617 | int error; | 2614 | int error; |
| 2618 | 2615 | ||
| 2619 | prepend(&res, &buflen, "\0", 1); | 2616 | prepend(&res, &buflen, "\0", 1); |
| 2617 | br_read_lock(&vfsmount_lock); | ||
| 2620 | write_seqlock(&rename_lock); | 2618 | write_seqlock(&rename_lock); |
| 2621 | error = prepend_path(path, root, &res, &buflen); | 2619 | error = prepend_path(path, root, &res, &buflen); |
| 2622 | write_sequnlock(&rename_lock); | 2620 | write_sequnlock(&rename_lock); |
| 2621 | br_read_unlock(&vfsmount_lock); | ||
| 2623 | 2622 | ||
| 2624 | if (error < 0) | 2623 | if (error < 0) |
| 2625 | return ERR_PTR(error); | 2624 | return ERR_PTR(error); |
| @@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path, | |||
| 2636 | int error; | 2635 | int error; |
| 2637 | 2636 | ||
| 2638 | prepend(&res, &buflen, "\0", 1); | 2637 | prepend(&res, &buflen, "\0", 1); |
| 2638 | br_read_lock(&vfsmount_lock); | ||
| 2639 | write_seqlock(&rename_lock); | 2639 | write_seqlock(&rename_lock); |
| 2640 | error = prepend_path(path, &root, &res, &buflen); | 2640 | error = prepend_path(path, &root, &res, &buflen); |
| 2641 | write_sequnlock(&rename_lock); | 2641 | write_sequnlock(&rename_lock); |
| 2642 | br_read_unlock(&vfsmount_lock); | ||
| 2642 | 2643 | ||
| 2643 | if (error > 1) | 2644 | if (error > 1) |
| 2644 | error = -EINVAL; | 2645 | error = -EINVAL; |
| @@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen) | |||
| 2702 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); | 2703 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); |
| 2703 | 2704 | ||
| 2704 | get_fs_root(current->fs, &root); | 2705 | get_fs_root(current->fs, &root); |
| 2706 | br_read_lock(&vfsmount_lock); | ||
| 2705 | write_seqlock(&rename_lock); | 2707 | write_seqlock(&rename_lock); |
| 2706 | error = path_with_deleted(path, &root, &res, &buflen); | 2708 | error = path_with_deleted(path, &root, &res, &buflen); |
| 2709 | write_sequnlock(&rename_lock); | ||
| 2710 | br_read_unlock(&vfsmount_lock); | ||
| 2707 | if (error < 0) | 2711 | if (error < 0) |
| 2708 | res = ERR_PTR(error); | 2712 | res = ERR_PTR(error); |
| 2709 | write_sequnlock(&rename_lock); | ||
| 2710 | path_put(&root); | 2713 | path_put(&root); |
| 2711 | return res; | 2714 | return res; |
| 2712 | } | 2715 | } |
| @@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
| 2830 | get_fs_root_and_pwd(current->fs, &root, &pwd); | 2833 | get_fs_root_and_pwd(current->fs, &root, &pwd); |
| 2831 | 2834 | ||
| 2832 | error = -ENOENT; | 2835 | error = -ENOENT; |
| 2836 | br_read_lock(&vfsmount_lock); | ||
| 2833 | write_seqlock(&rename_lock); | 2837 | write_seqlock(&rename_lock); |
| 2834 | if (!d_unlinked(pwd.dentry)) { | 2838 | if (!d_unlinked(pwd.dentry)) { |
| 2835 | unsigned long len; | 2839 | unsigned long len; |
| @@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
| 2839 | prepend(&cwd, &buflen, "\0", 1); | 2843 | prepend(&cwd, &buflen, "\0", 1); |
| 2840 | error = prepend_path(&pwd, &root, &cwd, &buflen); | 2844 | error = prepend_path(&pwd, &root, &cwd, &buflen); |
| 2841 | write_sequnlock(&rename_lock); | 2845 | write_sequnlock(&rename_lock); |
| 2846 | br_read_unlock(&vfsmount_lock); | ||
| 2842 | 2847 | ||
| 2843 | if (error < 0) | 2848 | if (error < 0) |
| 2844 | goto out; | 2849 | goto out; |
| @@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
| 2859 | } | 2864 | } |
| 2860 | } else { | 2865 | } else { |
| 2861 | write_sequnlock(&rename_lock); | 2866 | write_sequnlock(&rename_lock); |
| 2867 | br_read_unlock(&vfsmount_lock); | ||
| 2862 | } | 2868 | } |
| 2863 | 2869 | ||
| 2864 | out: | 2870 | out: |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4a01ba315262..3b83cd604796 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -335,9 +335,9 @@ struct ext4_group_desc | |||
| 335 | */ | 335 | */ |
| 336 | 336 | ||
| 337 | struct flex_groups { | 337 | struct flex_groups { |
| 338 | atomic_t free_inodes; | 338 | atomic64_t free_clusters; |
| 339 | atomic_t free_clusters; | 339 | atomic_t free_inodes; |
| 340 | atomic_t used_dirs; | 340 | atomic_t used_dirs; |
| 341 | }; | 341 | }; |
| 342 | 342 | ||
| 343 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ | 343 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ |
| @@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 2617 | extern int __init ext4_init_pageio(void); | 2617 | extern int __init ext4_init_pageio(void); |
| 2618 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | 2618 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); |
| 2619 | extern void ext4_exit_pageio(void); | 2619 | extern void ext4_exit_pageio(void); |
| 2620 | extern void ext4_ioend_wait(struct inode *); | 2620 | extern void ext4_ioend_shutdown(struct inode *); |
| 2621 | extern void ext4_free_io_end(ext4_io_end_t *io); | 2621 | extern void ext4_free_io_end(ext4_io_end_t *io); |
| 2622 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2622 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
| 2623 | extern void ext4_end_io_work(struct work_struct *work); | 2623 | extern void ext4_end_io_work(struct work_struct *work); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 28dd8eeea6a9..9c6d06dcef8b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
| 1584 | unsigned short ext1_ee_len, ext2_ee_len, max_len; | 1584 | unsigned short ext1_ee_len, ext2_ee_len, max_len; |
| 1585 | 1585 | ||
| 1586 | /* | 1586 | /* |
| 1587 | * Make sure that either both extents are uninitialized, or | 1587 | * Make sure that both extents are initialized. We don't merge |
| 1588 | * both are _not_. | 1588 | * uninitialized extents so that we can be sure that end_io code has |
| 1589 | * the extent that was written properly split out and conversion to | ||
| 1590 | * initialized is trivial. | ||
| 1589 | */ | 1591 | */ |
| 1590 | if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) | 1592 | if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) |
| 1591 | return 0; | 1593 | return 0; |
| 1592 | 1594 | ||
| 1593 | if (ext4_ext_is_uninitialized(ex1)) | 1595 | if (ext4_ext_is_uninitialized(ex1)) |
| @@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle, | |||
| 2923 | { | 2925 | { |
| 2924 | ext4_fsblk_t newblock; | 2926 | ext4_fsblk_t newblock; |
| 2925 | ext4_lblk_t ee_block; | 2927 | ext4_lblk_t ee_block; |
| 2926 | struct ext4_extent *ex, newex, orig_ex; | 2928 | struct ext4_extent *ex, newex, orig_ex, zero_ex; |
| 2927 | struct ext4_extent *ex2 = NULL; | 2929 | struct ext4_extent *ex2 = NULL; |
| 2928 | unsigned int ee_len, depth; | 2930 | unsigned int ee_len, depth; |
| 2929 | int err = 0; | 2931 | int err = 0; |
| @@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle, | |||
| 2943 | newblock = split - ee_block + ext4_ext_pblock(ex); | 2945 | newblock = split - ee_block + ext4_ext_pblock(ex); |
| 2944 | 2946 | ||
| 2945 | BUG_ON(split < ee_block || split >= (ee_block + ee_len)); | 2947 | BUG_ON(split < ee_block || split >= (ee_block + ee_len)); |
| 2948 | BUG_ON(!ext4_ext_is_uninitialized(ex) && | ||
| 2949 | split_flag & (EXT4_EXT_MAY_ZEROOUT | | ||
| 2950 | EXT4_EXT_MARK_UNINIT1 | | ||
| 2951 | EXT4_EXT_MARK_UNINIT2)); | ||
| 2946 | 2952 | ||
| 2947 | err = ext4_ext_get_access(handle, inode, path + depth); | 2953 | err = ext4_ext_get_access(handle, inode, path + depth); |
| 2948 | if (err) | 2954 | if (err) |
| @@ -2990,12 +2996,29 @@ static int ext4_split_extent_at(handle_t *handle, | |||
| 2990 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 2996 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
| 2991 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | 2997 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { |
| 2992 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { | 2998 | if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { |
| 2993 | if (split_flag & EXT4_EXT_DATA_VALID1) | 2999 | if (split_flag & EXT4_EXT_DATA_VALID1) { |
| 2994 | err = ext4_ext_zeroout(inode, ex2); | 3000 | err = ext4_ext_zeroout(inode, ex2); |
| 2995 | else | 3001 | zero_ex.ee_block = ex2->ee_block; |
| 3002 | zero_ex.ee_len = cpu_to_le16( | ||
| 3003 | ext4_ext_get_actual_len(ex2)); | ||
| 3004 | ext4_ext_store_pblock(&zero_ex, | ||
| 3005 | ext4_ext_pblock(ex2)); | ||
| 3006 | } else { | ||
| 2996 | err = ext4_ext_zeroout(inode, ex); | 3007 | err = ext4_ext_zeroout(inode, ex); |
| 2997 | } else | 3008 | zero_ex.ee_block = ex->ee_block; |
| 3009 | zero_ex.ee_len = cpu_to_le16( | ||
| 3010 | ext4_ext_get_actual_len(ex)); | ||
| 3011 | ext4_ext_store_pblock(&zero_ex, | ||
| 3012 | ext4_ext_pblock(ex)); | ||
| 3013 | } | ||
| 3014 | } else { | ||
| 2998 | err = ext4_ext_zeroout(inode, &orig_ex); | 3015 | err = ext4_ext_zeroout(inode, &orig_ex); |
| 3016 | zero_ex.ee_block = orig_ex.ee_block; | ||
| 3017 | zero_ex.ee_len = cpu_to_le16( | ||
| 3018 | ext4_ext_get_actual_len(&orig_ex)); | ||
| 3019 | ext4_ext_store_pblock(&zero_ex, | ||
| 3020 | ext4_ext_pblock(&orig_ex)); | ||
| 3021 | } | ||
| 2999 | 3022 | ||
| 3000 | if (err) | 3023 | if (err) |
| 3001 | goto fix_extent_len; | 3024 | goto fix_extent_len; |
| @@ -3003,6 +3026,12 @@ static int ext4_split_extent_at(handle_t *handle, | |||
| 3003 | ex->ee_len = cpu_to_le16(ee_len); | 3026 | ex->ee_len = cpu_to_le16(ee_len); |
| 3004 | ext4_ext_try_to_merge(handle, inode, path, ex); | 3027 | ext4_ext_try_to_merge(handle, inode, path, ex); |
| 3005 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | 3028 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); |
| 3029 | if (err) | ||
| 3030 | goto fix_extent_len; | ||
| 3031 | |||
| 3032 | /* update extent status tree */ | ||
| 3033 | err = ext4_es_zeroout(inode, &zero_ex); | ||
| 3034 | |||
| 3006 | goto out; | 3035 | goto out; |
| 3007 | } else if (err) | 3036 | } else if (err) |
| 3008 | goto fix_extent_len; | 3037 | goto fix_extent_len; |
| @@ -3041,6 +3070,7 @@ static int ext4_split_extent(handle_t *handle, | |||
| 3041 | int err = 0; | 3070 | int err = 0; |
| 3042 | int uninitialized; | 3071 | int uninitialized; |
| 3043 | int split_flag1, flags1; | 3072 | int split_flag1, flags1; |
| 3073 | int allocated = map->m_len; | ||
| 3044 | 3074 | ||
| 3045 | depth = ext_depth(inode); | 3075 | depth = ext_depth(inode); |
| 3046 | ex = path[depth].p_ext; | 3076 | ex = path[depth].p_ext; |
| @@ -3060,20 +3090,29 @@ static int ext4_split_extent(handle_t *handle, | |||
| 3060 | map->m_lblk + map->m_len, split_flag1, flags1); | 3090 | map->m_lblk + map->m_len, split_flag1, flags1); |
| 3061 | if (err) | 3091 | if (err) |
| 3062 | goto out; | 3092 | goto out; |
| 3093 | } else { | ||
| 3094 | allocated = ee_len - (map->m_lblk - ee_block); | ||
| 3063 | } | 3095 | } |
| 3064 | 3096 | /* | |
| 3097 | * Update path is required because previous ext4_split_extent_at() may | ||
| 3098 | * result in split of original leaf or extent zeroout. | ||
| 3099 | */ | ||
| 3065 | ext4_ext_drop_refs(path); | 3100 | ext4_ext_drop_refs(path); |
| 3066 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | 3101 | path = ext4_ext_find_extent(inode, map->m_lblk, path); |
| 3067 | if (IS_ERR(path)) | 3102 | if (IS_ERR(path)) |
| 3068 | return PTR_ERR(path); | 3103 | return PTR_ERR(path); |
| 3104 | depth = ext_depth(inode); | ||
| 3105 | ex = path[depth].p_ext; | ||
| 3106 | uninitialized = ext4_ext_is_uninitialized(ex); | ||
| 3107 | split_flag1 = 0; | ||
| 3069 | 3108 | ||
| 3070 | if (map->m_lblk >= ee_block) { | 3109 | if (map->m_lblk >= ee_block) { |
| 3071 | split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | | 3110 | split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; |
| 3072 | EXT4_EXT_DATA_VALID2); | 3111 | if (uninitialized) { |
| 3073 | if (uninitialized) | ||
| 3074 | split_flag1 |= EXT4_EXT_MARK_UNINIT1; | 3112 | split_flag1 |= EXT4_EXT_MARK_UNINIT1; |
| 3075 | if (split_flag & EXT4_EXT_MARK_UNINIT2) | 3113 | split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | |
| 3076 | split_flag1 |= EXT4_EXT_MARK_UNINIT2; | 3114 | EXT4_EXT_MARK_UNINIT2); |
| 3115 | } | ||
| 3077 | err = ext4_split_extent_at(handle, inode, path, | 3116 | err = ext4_split_extent_at(handle, inode, path, |
| 3078 | map->m_lblk, split_flag1, flags); | 3117 | map->m_lblk, split_flag1, flags); |
| 3079 | if (err) | 3118 | if (err) |
| @@ -3082,7 +3121,7 @@ static int ext4_split_extent(handle_t *handle, | |||
| 3082 | 3121 | ||
| 3083 | ext4_ext_show_leaf(inode, path); | 3122 | ext4_ext_show_leaf(inode, path); |
| 3084 | out: | 3123 | out: |
| 3085 | return err ? err : map->m_len; | 3124 | return err ? err : allocated; |
| 3086 | } | 3125 | } |
| 3087 | 3126 | ||
| 3088 | /* | 3127 | /* |
| @@ -3137,6 +3176,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
| 3137 | ee_block = le32_to_cpu(ex->ee_block); | 3176 | ee_block = le32_to_cpu(ex->ee_block); |
| 3138 | ee_len = ext4_ext_get_actual_len(ex); | 3177 | ee_len = ext4_ext_get_actual_len(ex); |
| 3139 | allocated = ee_len - (map->m_lblk - ee_block); | 3178 | allocated = ee_len - (map->m_lblk - ee_block); |
| 3179 | zero_ex.ee_len = 0; | ||
| 3140 | 3180 | ||
| 3141 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); | 3181 | trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); |
| 3142 | 3182 | ||
| @@ -3227,13 +3267,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
| 3227 | 3267 | ||
| 3228 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) | 3268 | if (EXT4_EXT_MAY_ZEROOUT & split_flag) |
| 3229 | max_zeroout = sbi->s_extent_max_zeroout_kb >> | 3269 | max_zeroout = sbi->s_extent_max_zeroout_kb >> |
| 3230 | inode->i_sb->s_blocksize_bits; | 3270 | (inode->i_sb->s_blocksize_bits - 10); |
| 3231 | 3271 | ||
| 3232 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ | 3272 | /* If extent is less than s_max_zeroout_kb, zeroout directly */ |
| 3233 | if (max_zeroout && (ee_len <= max_zeroout)) { | 3273 | if (max_zeroout && (ee_len <= max_zeroout)) { |
| 3234 | err = ext4_ext_zeroout(inode, ex); | 3274 | err = ext4_ext_zeroout(inode, ex); |
| 3235 | if (err) | 3275 | if (err) |
| 3236 | goto out; | 3276 | goto out; |
| 3277 | zero_ex.ee_block = ex->ee_block; | ||
| 3278 | zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)); | ||
| 3279 | ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); | ||
| 3237 | 3280 | ||
| 3238 | err = ext4_ext_get_access(handle, inode, path + depth); | 3281 | err = ext4_ext_get_access(handle, inode, path + depth); |
| 3239 | if (err) | 3282 | if (err) |
| @@ -3292,6 +3335,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
| 3292 | err = allocated; | 3335 | err = allocated; |
| 3293 | 3336 | ||
| 3294 | out: | 3337 | out: |
| 3338 | /* If we have gotten a failure, don't zero out status tree */ | ||
| 3339 | if (!err) | ||
| 3340 | err = ext4_es_zeroout(inode, &zero_ex); | ||
| 3295 | return err ? err : allocated; | 3341 | return err ? err : allocated; |
| 3296 | } | 3342 | } |
| 3297 | 3343 | ||
| @@ -3374,8 +3420,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
| 3374 | "block %llu, max_blocks %u\n", inode->i_ino, | 3420 | "block %llu, max_blocks %u\n", inode->i_ino, |
| 3375 | (unsigned long long)ee_block, ee_len); | 3421 | (unsigned long long)ee_block, ee_len); |
| 3376 | 3422 | ||
| 3377 | /* If extent is larger than requested then split is required */ | 3423 | /* If extent is larger than requested it is a clear sign that we still |
| 3424 | * have some extent state machine issues left. So extent_split is still | ||
| 3425 | * required. | ||
| 3426 | * TODO: Once all related issues will be fixed this situation should be | ||
| 3427 | * illegal. | ||
| 3428 | */ | ||
| 3378 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | 3429 | if (ee_block != map->m_lblk || ee_len > map->m_len) { |
| 3430 | #ifdef EXT4_DEBUG | ||
| 3431 | ext4_warning("Inode (%ld) finished: extent logical block %llu," | ||
| 3432 | " len %u; IO logical block %llu, len %u\n", | ||
| 3433 | inode->i_ino, (unsigned long long)ee_block, ee_len, | ||
| 3434 | (unsigned long long)map->m_lblk, map->m_len); | ||
| 3435 | #endif | ||
| 3379 | err = ext4_split_unwritten_extents(handle, inode, map, path, | 3436 | err = ext4_split_unwritten_extents(handle, inode, map, path, |
| 3380 | EXT4_GET_BLOCKS_CONVERT); | 3437 | EXT4_GET_BLOCKS_CONVERT); |
| 3381 | if (err < 0) | 3438 | if (err < 0) |
| @@ -3626,6 +3683,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
| 3626 | path, map->m_len); | 3683 | path, map->m_len); |
| 3627 | } else | 3684 | } else |
| 3628 | err = ret; | 3685 | err = ret; |
| 3686 | map->m_flags |= EXT4_MAP_MAPPED; | ||
| 3687 | if (allocated > map->m_len) | ||
| 3688 | allocated = map->m_len; | ||
| 3689 | map->m_len = allocated; | ||
| 3629 | goto out2; | 3690 | goto out2; |
| 3630 | } | 3691 | } |
| 3631 | /* buffered IO case */ | 3692 | /* buffered IO case */ |
| @@ -3675,6 +3736,7 @@ out: | |||
| 3675 | allocated - map->m_len); | 3736 | allocated - map->m_len); |
| 3676 | allocated = map->m_len; | 3737 | allocated = map->m_len; |
| 3677 | } | 3738 | } |
| 3739 | map->m_len = allocated; | ||
| 3678 | 3740 | ||
| 3679 | /* | 3741 | /* |
| 3680 | * If we have done fallocate with the offset that is already | 3742 | * If we have done fallocate with the offset that is already |
| @@ -4106,9 +4168,6 @@ got_allocated_blocks: | |||
| 4106 | } | 4168 | } |
| 4107 | } else { | 4169 | } else { |
| 4108 | BUG_ON(allocated_clusters < reserved_clusters); | 4170 | BUG_ON(allocated_clusters < reserved_clusters); |
| 4109 | /* We will claim quota for all newly allocated blocks.*/ | ||
| 4110 | ext4_da_update_reserve_space(inode, allocated_clusters, | ||
| 4111 | 1); | ||
| 4112 | if (reserved_clusters < allocated_clusters) { | 4171 | if (reserved_clusters < allocated_clusters) { |
| 4113 | struct ext4_inode_info *ei = EXT4_I(inode); | 4172 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 4114 | int reservation = allocated_clusters - | 4173 | int reservation = allocated_clusters - |
| @@ -4159,6 +4218,15 @@ got_allocated_blocks: | |||
| 4159 | ei->i_reserved_data_blocks += reservation; | 4218 | ei->i_reserved_data_blocks += reservation; |
| 4160 | spin_unlock(&ei->i_block_reservation_lock); | 4219 | spin_unlock(&ei->i_block_reservation_lock); |
| 4161 | } | 4220 | } |
| 4221 | /* | ||
| 4222 | * We will claim quota for all newly allocated blocks. | ||
| 4223 | * We're updating the reserved space *after* the | ||
| 4224 | * correction above so we do not accidentally free | ||
| 4225 | * all the metadata reservation because we might | ||
| 4226 | * actually need it later on. | ||
| 4227 | */ | ||
| 4228 | ext4_da_update_reserve_space(inode, allocated_clusters, | ||
| 4229 | 1); | ||
| 4162 | } | 4230 | } |
| 4163 | } | 4231 | } |
| 4164 | 4232 | ||
| @@ -4368,8 +4436,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 4368 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) | 4436 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) |
| 4369 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | 4437 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; |
| 4370 | 4438 | ||
| 4371 | /* Prevent race condition between unwritten */ | ||
| 4372 | ext4_flush_unwritten_io(inode); | ||
| 4373 | retry: | 4439 | retry: |
| 4374 | while (ret >= 0 && ret < max_blocks) { | 4440 | while (ret >= 0 && ret < max_blocks) { |
| 4375 | map.m_lblk = map.m_lblk + ret; | 4441 | map.m_lblk = map.m_lblk + ret; |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 95796a1b7522..fe3337a85ede 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
| @@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
| 333 | static int ext4_es_can_be_merged(struct extent_status *es1, | 333 | static int ext4_es_can_be_merged(struct extent_status *es1, |
| 334 | struct extent_status *es2) | 334 | struct extent_status *es2) |
| 335 | { | 335 | { |
| 336 | if (es1->es_lblk + es1->es_len != es2->es_lblk) | 336 | if (ext4_es_status(es1) != ext4_es_status(es2)) |
| 337 | return 0; | 337 | return 0; |
| 338 | 338 | ||
| 339 | if (ext4_es_status(es1) != ext4_es_status(es2)) | 339 | if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) |
| 340 | return 0; | 340 | return 0; |
| 341 | 341 | ||
| 342 | if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && | 342 | if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) |
| 343 | (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2))) | ||
| 344 | return 0; | 343 | return 0; |
| 345 | 344 | ||
| 346 | return 1; | 345 | if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && |
| 346 | (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2))) | ||
| 347 | return 1; | ||
| 348 | |||
| 349 | if (ext4_es_is_hole(es1)) | ||
| 350 | return 1; | ||
| 351 | |||
| 352 | /* we need to check delayed extent is without unwritten status */ | ||
| 353 | if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1)) | ||
| 354 | return 1; | ||
| 355 | |||
| 356 | return 0; | ||
| 347 | } | 357 | } |
| 348 | 358 | ||
| 349 | static struct extent_status * | 359 | static struct extent_status * |
| @@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) | |||
| 389 | return es; | 399 | return es; |
| 390 | } | 400 | } |
| 391 | 401 | ||
| 402 | #ifdef ES_AGGRESSIVE_TEST | ||
| 403 | static void ext4_es_insert_extent_ext_check(struct inode *inode, | ||
| 404 | struct extent_status *es) | ||
| 405 | { | ||
| 406 | struct ext4_ext_path *path = NULL; | ||
| 407 | struct ext4_extent *ex; | ||
| 408 | ext4_lblk_t ee_block; | ||
| 409 | ext4_fsblk_t ee_start; | ||
| 410 | unsigned short ee_len; | ||
| 411 | int depth, ee_status, es_status; | ||
| 412 | |||
| 413 | path = ext4_ext_find_extent(inode, es->es_lblk, NULL); | ||
| 414 | if (IS_ERR(path)) | ||
| 415 | return; | ||
| 416 | |||
| 417 | depth = ext_depth(inode); | ||
| 418 | ex = path[depth].p_ext; | ||
| 419 | |||
| 420 | if (ex) { | ||
| 421 | |||
| 422 | ee_block = le32_to_cpu(ex->ee_block); | ||
| 423 | ee_start = ext4_ext_pblock(ex); | ||
| 424 | ee_len = ext4_ext_get_actual_len(ex); | ||
| 425 | |||
| 426 | ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; | ||
| 427 | es_status = ext4_es_is_unwritten(es) ? 1 : 0; | ||
| 428 | |||
| 429 | /* | ||
| 430 | * Make sure ex and es are not overlap when we try to insert | ||
| 431 | * a delayed/hole extent. | ||
| 432 | */ | ||
| 433 | if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { | ||
| 434 | if (in_range(es->es_lblk, ee_block, ee_len)) { | ||
| 435 | pr_warn("ES insert assertation failed for " | ||
| 436 | "inode: %lu we can find an extent " | ||
| 437 | "at block [%d/%d/%llu/%c], but we " | ||
| 438 | "want to add an delayed/hole extent " | ||
| 439 | "[%d/%d/%llu/%llx]\n", | ||
| 440 | inode->i_ino, ee_block, ee_len, | ||
| 441 | ee_start, ee_status ? 'u' : 'w', | ||
| 442 | es->es_lblk, es->es_len, | ||
| 443 | ext4_es_pblock(es), ext4_es_status(es)); | ||
| 444 | } | ||
| 445 | goto out; | ||
| 446 | } | ||
| 447 | |||
| 448 | /* | ||
| 449 | * We don't check ee_block == es->es_lblk, etc. because es | ||
| 450 | * might be a part of whole extent, vice versa. | ||
| 451 | */ | ||
| 452 | if (es->es_lblk < ee_block || | ||
| 453 | ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { | ||
| 454 | pr_warn("ES insert assertation failed for inode: %lu " | ||
| 455 | "ex_status [%d/%d/%llu/%c] != " | ||
| 456 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | ||
| 457 | ee_block, ee_len, ee_start, | ||
| 458 | ee_status ? 'u' : 'w', es->es_lblk, es->es_len, | ||
| 459 | ext4_es_pblock(es), es_status ? 'u' : 'w'); | ||
| 460 | goto out; | ||
| 461 | } | ||
| 462 | |||
| 463 | if (ee_status ^ es_status) { | ||
| 464 | pr_warn("ES insert assertation failed for inode: %lu " | ||
| 465 | "ex_status [%d/%d/%llu/%c] != " | ||
| 466 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | ||
| 467 | ee_block, ee_len, ee_start, | ||
| 468 | ee_status ? 'u' : 'w', es->es_lblk, es->es_len, | ||
| 469 | ext4_es_pblock(es), es_status ? 'u' : 'w'); | ||
| 470 | } | ||
| 471 | } else { | ||
| 472 | /* | ||
| 473 | * We can't find an extent on disk. So we need to make sure | ||
| 474 | * that we don't want to add an written/unwritten extent. | ||
| 475 | */ | ||
| 476 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | ||
| 477 | pr_warn("ES insert assertation failed for inode: %lu " | ||
| 478 | "can't find an extent at block %d but we want " | ||
| 479 | "to add an written/unwritten extent " | ||
| 480 | "[%d/%d/%llu/%llx]\n", inode->i_ino, | ||
| 481 | es->es_lblk, es->es_lblk, es->es_len, | ||
| 482 | ext4_es_pblock(es), ext4_es_status(es)); | ||
| 483 | } | ||
| 484 | } | ||
| 485 | out: | ||
| 486 | if (path) { | ||
| 487 | ext4_ext_drop_refs(path); | ||
| 488 | kfree(path); | ||
| 489 | } | ||
| 490 | } | ||
| 491 | |||
| 492 | static void ext4_es_insert_extent_ind_check(struct inode *inode, | ||
| 493 | struct extent_status *es) | ||
| 494 | { | ||
| 495 | struct ext4_map_blocks map; | ||
| 496 | int retval; | ||
| 497 | |||
| 498 | /* | ||
| 499 | * Here we call ext4_ind_map_blocks to lookup a block mapping because | ||
| 500 | * 'Indirect' structure is defined in indirect.c. So we couldn't | ||
| 501 | * access direct/indirect tree from outside. It is too dirty to define | ||
| 502 | * this function in indirect.c file. | ||
| 503 | */ | ||
| 504 | |||
| 505 | map.m_lblk = es->es_lblk; | ||
| 506 | map.m_len = es->es_len; | ||
| 507 | |||
| 508 | retval = ext4_ind_map_blocks(NULL, inode, &map, 0); | ||
| 509 | if (retval > 0) { | ||
| 510 | if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) { | ||
| 511 | /* | ||
| 512 | * We want to add a delayed/hole extent but this | ||
| 513 | * block has been allocated. | ||
| 514 | */ | ||
| 515 | pr_warn("ES insert assertation failed for inode: %lu " | ||
| 516 | "We can find blocks but we want to add a " | ||
| 517 | "delayed/hole extent [%d/%d/%llu/%llx]\n", | ||
| 518 | inode->i_ino, es->es_lblk, es->es_len, | ||
| 519 | ext4_es_pblock(es), ext4_es_status(es)); | ||
| 520 | return; | ||
| 521 | } else if (ext4_es_is_written(es)) { | ||
| 522 | if (retval != es->es_len) { | ||
| 523 | pr_warn("ES insert assertation failed for " | ||
| 524 | "inode: %lu retval %d != es_len %d\n", | ||
| 525 | inode->i_ino, retval, es->es_len); | ||
| 526 | return; | ||
| 527 | } | ||
| 528 | if (map.m_pblk != ext4_es_pblock(es)) { | ||
| 529 | pr_warn("ES insert assertation failed for " | ||
| 530 | "inode: %lu m_pblk %llu != " | ||
| 531 | "es_pblk %llu\n", | ||
| 532 | inode->i_ino, map.m_pblk, | ||
| 533 | ext4_es_pblock(es)); | ||
| 534 | return; | ||
| 535 | } | ||
| 536 | } else { | ||
| 537 | /* | ||
| 538 | * We don't need to check unwritten extent because | ||
| 539 | * indirect-based file doesn't have it. | ||
| 540 | */ | ||
| 541 | BUG_ON(1); | ||
| 542 | } | ||
| 543 | } else if (retval == 0) { | ||
| 544 | if (ext4_es_is_written(es)) { | ||
| 545 | pr_warn("ES insert assertation failed for inode: %lu " | ||
| 546 | "We can't find the block but we want to add " | ||
| 547 | "an written extent [%d/%d/%llu/%llx]\n", | ||
| 548 | inode->i_ino, es->es_lblk, es->es_len, | ||
| 549 | ext4_es_pblock(es), ext4_es_status(es)); | ||
| 550 | return; | ||
| 551 | } | ||
| 552 | } | ||
| 553 | } | ||
| 554 | |||
| 555 | static inline void ext4_es_insert_extent_check(struct inode *inode, | ||
| 556 | struct extent_status *es) | ||
| 557 | { | ||
| 558 | /* | ||
| 559 | * We don't need to worry about the race condition because | ||
| 560 | * caller takes i_data_sem locking. | ||
| 561 | */ | ||
| 562 | BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem)); | ||
| 563 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
| 564 | ext4_es_insert_extent_ext_check(inode, es); | ||
| 565 | else | ||
| 566 | ext4_es_insert_extent_ind_check(inode, es); | ||
| 567 | } | ||
| 568 | #else | ||
| 569 | static inline void ext4_es_insert_extent_check(struct inode *inode, | ||
| 570 | struct extent_status *es) | ||
| 571 | { | ||
| 572 | } | ||
| 573 | #endif | ||
| 574 | |||
| 392 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes) | 575 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes) |
| 393 | { | 576 | { |
| 394 | struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; | 577 | struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; |
| @@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 471 | ext4_es_store_status(&newes, status); | 654 | ext4_es_store_status(&newes, status); |
| 472 | trace_ext4_es_insert_extent(inode, &newes); | 655 | trace_ext4_es_insert_extent(inode, &newes); |
| 473 | 656 | ||
| 657 | ext4_es_insert_extent_check(inode, &newes); | ||
| 658 | |||
| 474 | write_lock(&EXT4_I(inode)->i_es_lock); | 659 | write_lock(&EXT4_I(inode)->i_es_lock); |
| 475 | err = __es_remove_extent(inode, lblk, end); | 660 | err = __es_remove_extent(inode, lblk, end); |
| 476 | if (err != 0) | 661 | if (err != 0) |
| @@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 669 | return err; | 854 | return err; |
| 670 | } | 855 | } |
| 671 | 856 | ||
| 857 | int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | ||
| 858 | { | ||
| 859 | ext4_lblk_t ee_block; | ||
| 860 | ext4_fsblk_t ee_pblock; | ||
| 861 | unsigned int ee_len; | ||
| 862 | |||
| 863 | ee_block = le32_to_cpu(ex->ee_block); | ||
| 864 | ee_len = ext4_ext_get_actual_len(ex); | ||
| 865 | ee_pblock = ext4_ext_pblock(ex); | ||
| 866 | |||
| 867 | if (ee_len == 0) | ||
| 868 | return 0; | ||
| 869 | |||
| 870 | return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, | ||
| 871 | EXTENT_STATUS_WRITTEN); | ||
| 872 | } | ||
| 873 | |||
| 672 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 874 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) |
| 673 | { | 875 | { |
| 674 | struct ext4_sb_info *sbi = container_of(shrink, | 876 | struct ext4_sb_info *sbi = container_of(shrink, |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f190dfe969da..d8e2d4dc311e 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
| @@ -21,6 +21,12 @@ | |||
| 21 | #endif | 21 | #endif |
| 22 | 22 | ||
| 23 | /* | 23 | /* |
| 24 | * With ES_AGGRESSIVE_TEST defined, the result of es caching will be | ||
| 25 | * checked with old map_block's result. | ||
| 26 | */ | ||
| 27 | #define ES_AGGRESSIVE_TEST__ | ||
| 28 | |||
| 29 | /* | ||
| 24 | * These flags live in the high bits of extent_status.es_pblk | 30 | * These flags live in the high bits of extent_status.es_pblk |
| 25 | */ | 31 | */ |
| 26 | #define EXTENT_STATUS_WRITTEN (1ULL << 63) | 32 | #define EXTENT_STATUS_WRITTEN (1ULL << 63) |
| @@ -33,6 +39,8 @@ | |||
| 33 | EXTENT_STATUS_DELAYED | \ | 39 | EXTENT_STATUS_DELAYED | \ |
| 34 | EXTENT_STATUS_HOLE) | 40 | EXTENT_STATUS_HOLE) |
| 35 | 41 | ||
| 42 | struct ext4_extent; | ||
| 43 | |||
| 36 | struct extent_status { | 44 | struct extent_status { |
| 37 | struct rb_node rb_node; | 45 | struct rb_node rb_node; |
| 38 | ext4_lblk_t es_lblk; /* first logical block extent covers */ | 46 | ext4_lblk_t es_lblk; /* first logical block extent covers */ |
| @@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 58 | struct extent_status *es); | 66 | struct extent_status *es); |
| 59 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 67 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
| 60 | struct extent_status *es); | 68 | struct extent_status *es); |
| 69 | extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex); | ||
| 61 | 70 | ||
| 62 | static inline int ext4_es_is_written(struct extent_status *es) | 71 | static inline int ext4_es_is_written(struct extent_status *es) |
| 63 | { | 72 | { |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 32fd2b9075dd..6c5bb8d993fe 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -324,8 +324,8 @@ error_return: | |||
| 324 | } | 324 | } |
| 325 | 325 | ||
| 326 | struct orlov_stats { | 326 | struct orlov_stats { |
| 327 | __u64 free_clusters; | ||
| 327 | __u32 free_inodes; | 328 | __u32 free_inodes; |
| 328 | __u32 free_clusters; | ||
| 329 | __u32 used_dirs; | 329 | __u32 used_dirs; |
| 330 | }; | 330 | }; |
| 331 | 331 | ||
| @@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
| 342 | 342 | ||
| 343 | if (flex_size > 1) { | 343 | if (flex_size > 1) { |
| 344 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); | 344 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); |
| 345 | stats->free_clusters = atomic_read(&flex_group[g].free_clusters); | 345 | stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); |
| 346 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); | 346 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); |
| 347 | return; | 347 | return; |
| 348 | } | 348 | } |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b505a145a593..a04183127ef0 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
| @@ -1539,9 +1539,9 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode, | |||
| 1539 | blk = *i_data; | 1539 | blk = *i_data; |
| 1540 | if (level > 0) { | 1540 | if (level > 0) { |
| 1541 | ext4_lblk_t first2; | 1541 | ext4_lblk_t first2; |
| 1542 | bh = sb_bread(inode->i_sb, blk); | 1542 | bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); |
| 1543 | if (!bh) { | 1543 | if (!bh) { |
| 1544 | EXT4_ERROR_INODE_BLOCK(inode, blk, | 1544 | EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), |
| 1545 | "Read failure"); | 1545 | "Read failure"); |
| 1546 | return -EIO; | 1546 | return -EIO; |
| 1547 | } | 1547 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9ea0cde3fa9e..b3a5213bc73e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode) | |||
| 185 | 185 | ||
| 186 | trace_ext4_evict_inode(inode); | 186 | trace_ext4_evict_inode(inode); |
| 187 | 187 | ||
| 188 | ext4_ioend_wait(inode); | ||
| 189 | |||
| 190 | if (inode->i_nlink) { | 188 | if (inode->i_nlink) { |
| 191 | /* | 189 | /* |
| 192 | * When journalling data dirty buffers are tracked only in the | 190 | * When journalling data dirty buffers are tracked only in the |
| @@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode) | |||
| 207 | * don't use page cache. | 205 | * don't use page cache. |
| 208 | */ | 206 | */ |
| 209 | if (ext4_should_journal_data(inode) && | 207 | if (ext4_should_journal_data(inode) && |
| 210 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | 208 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && |
| 209 | inode->i_ino != EXT4_JOURNAL_INO) { | ||
| 211 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 210 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
| 212 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; | 211 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; |
| 213 | 212 | ||
| @@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode) | |||
| 216 | filemap_write_and_wait(&inode->i_data); | 215 | filemap_write_and_wait(&inode->i_data); |
| 217 | } | 216 | } |
| 218 | truncate_inode_pages(&inode->i_data, 0); | 217 | truncate_inode_pages(&inode->i_data, 0); |
| 218 | ext4_ioend_shutdown(inode); | ||
| 219 | goto no_delete; | 219 | goto no_delete; |
| 220 | } | 220 | } |
| 221 | 221 | ||
| @@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode) | |||
| 225 | if (ext4_should_order_data(inode)) | 225 | if (ext4_should_order_data(inode)) |
| 226 | ext4_begin_ordered_truncate(inode, 0); | 226 | ext4_begin_ordered_truncate(inode, 0); |
| 227 | truncate_inode_pages(&inode->i_data, 0); | 227 | truncate_inode_pages(&inode->i_data, 0); |
| 228 | ext4_ioend_shutdown(inode); | ||
| 228 | 229 | ||
| 229 | if (is_bad_inode(inode)) | 230 | if (is_bad_inode(inode)) |
| 230 | goto no_delete; | 231 | goto no_delete; |
| @@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
| 482 | return num; | 483 | return num; |
| 483 | } | 484 | } |
| 484 | 485 | ||
| 486 | #ifdef ES_AGGRESSIVE_TEST | ||
| 487 | static void ext4_map_blocks_es_recheck(handle_t *handle, | ||
| 488 | struct inode *inode, | ||
| 489 | struct ext4_map_blocks *es_map, | ||
| 490 | struct ext4_map_blocks *map, | ||
| 491 | int flags) | ||
| 492 | { | ||
| 493 | int retval; | ||
| 494 | |||
| 495 | map->m_flags = 0; | ||
| 496 | /* | ||
| 497 | * There is a race window that the result is not the same. | ||
| 498 | * e.g. xfstests #223 when dioread_nolock enables. The reason | ||
| 499 | * is that we lookup a block mapping in extent status tree with | ||
| 500 | * out taking i_data_sem. So at the time the unwritten extent | ||
| 501 | * could be converted. | ||
| 502 | */ | ||
| 503 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | ||
| 504 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
| 505 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
| 506 | retval = ext4_ext_map_blocks(handle, inode, map, flags & | ||
| 507 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
| 508 | } else { | ||
| 509 | retval = ext4_ind_map_blocks(handle, inode, map, flags & | ||
| 510 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
| 511 | } | ||
| 512 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | ||
| 513 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
| 514 | /* | ||
| 515 | * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag | ||
| 516 | * because it shouldn't be marked in es_map->m_flags. | ||
| 517 | */ | ||
| 518 | map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY); | ||
| 519 | |||
| 520 | /* | ||
| 521 | * We don't check m_len because extent will be collpased in status | ||
| 522 | * tree. So the m_len might not equal. | ||
| 523 | */ | ||
| 524 | if (es_map->m_lblk != map->m_lblk || | ||
| 525 | es_map->m_flags != map->m_flags || | ||
| 526 | es_map->m_pblk != map->m_pblk) { | ||
| 527 | printk("ES cache assertation failed for inode: %lu " | ||
| 528 | "es_cached ex [%d/%d/%llu/%x] != " | ||
| 529 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", | ||
| 530 | inode->i_ino, es_map->m_lblk, es_map->m_len, | ||
| 531 | es_map->m_pblk, es_map->m_flags, map->m_lblk, | ||
| 532 | map->m_len, map->m_pblk, map->m_flags, | ||
| 533 | retval, flags); | ||
| 534 | } | ||
| 535 | } | ||
| 536 | #endif /* ES_AGGRESSIVE_TEST */ | ||
| 537 | |||
| 485 | /* | 538 | /* |
| 486 | * The ext4_map_blocks() function tries to look up the requested blocks, | 539 | * The ext4_map_blocks() function tries to look up the requested blocks, |
| 487 | * and returns if the blocks are already mapped. | 540 | * and returns if the blocks are already mapped. |
| @@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 509 | { | 562 | { |
| 510 | struct extent_status es; | 563 | struct extent_status es; |
| 511 | int retval; | 564 | int retval; |
| 565 | #ifdef ES_AGGRESSIVE_TEST | ||
| 566 | struct ext4_map_blocks orig_map; | ||
| 567 | |||
| 568 | memcpy(&orig_map, map, sizeof(*map)); | ||
| 569 | #endif | ||
| 512 | 570 | ||
| 513 | map->m_flags = 0; | 571 | map->m_flags = 0; |
| 514 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," | 572 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
| @@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 531 | } else { | 589 | } else { |
| 532 | BUG_ON(1); | 590 | BUG_ON(1); |
| 533 | } | 591 | } |
| 592 | #ifdef ES_AGGRESSIVE_TEST | ||
| 593 | ext4_map_blocks_es_recheck(handle, inode, map, | ||
| 594 | &orig_map, flags); | ||
| 595 | #endif | ||
| 534 | goto found; | 596 | goto found; |
| 535 | } | 597 | } |
| 536 | 598 | ||
| @@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 551 | int ret; | 613 | int ret; |
| 552 | unsigned long long status; | 614 | unsigned long long status; |
| 553 | 615 | ||
| 616 | #ifdef ES_AGGRESSIVE_TEST | ||
| 617 | if (retval != map->m_len) { | ||
| 618 | printk("ES len assertation failed for inode: %lu " | ||
| 619 | "retval %d != map->m_len %d " | ||
| 620 | "in %s (lookup)\n", inode->i_ino, retval, | ||
| 621 | map->m_len, __func__); | ||
| 622 | } | ||
| 623 | #endif | ||
| 624 | |||
| 554 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 625 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
| 555 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 626 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
| 556 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && | 627 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
| @@ -643,6 +714,24 @@ found: | |||
| 643 | int ret; | 714 | int ret; |
| 644 | unsigned long long status; | 715 | unsigned long long status; |
| 645 | 716 | ||
| 717 | #ifdef ES_AGGRESSIVE_TEST | ||
| 718 | if (retval != map->m_len) { | ||
| 719 | printk("ES len assertation failed for inode: %lu " | ||
| 720 | "retval %d != map->m_len %d " | ||
| 721 | "in %s (allocation)\n", inode->i_ino, retval, | ||
| 722 | map->m_len, __func__); | ||
| 723 | } | ||
| 724 | #endif | ||
| 725 | |||
| 726 | /* | ||
| 727 | * If the extent has been zeroed out, we don't need to update | ||
| 728 | * extent status tree. | ||
| 729 | */ | ||
| 730 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && | ||
| 731 | ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | ||
| 732 | if (ext4_es_is_written(&es)) | ||
| 733 | goto has_zeroout; | ||
| 734 | } | ||
| 646 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 735 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
| 647 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 736 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
| 648 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && | 737 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
| @@ -655,6 +744,7 @@ found: | |||
| 655 | retval = ret; | 744 | retval = ret; |
| 656 | } | 745 | } |
| 657 | 746 | ||
| 747 | has_zeroout: | ||
| 658 | up_write((&EXT4_I(inode)->i_data_sem)); | 748 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 659 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 749 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
| 660 | int ret = check_block_validity(inode, map); | 750 | int ret = check_block_validity(inode, map); |
| @@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1216 | } | 1306 | } |
| 1217 | 1307 | ||
| 1218 | /* | 1308 | /* |
| 1309 | * Reserve a metadata for a single block located at lblock | ||
| 1310 | */ | ||
| 1311 | static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) | ||
| 1312 | { | ||
| 1313 | int retries = 0; | ||
| 1314 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1315 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 1316 | unsigned int md_needed; | ||
| 1317 | ext4_lblk_t save_last_lblock; | ||
| 1318 | int save_len; | ||
| 1319 | |||
| 1320 | /* | ||
| 1321 | * recalculate the amount of metadata blocks to reserve | ||
| 1322 | * in order to allocate nrblocks | ||
| 1323 | * worse case is one extent per block | ||
| 1324 | */ | ||
| 1325 | repeat: | ||
| 1326 | spin_lock(&ei->i_block_reservation_lock); | ||
| 1327 | /* | ||
| 1328 | * ext4_calc_metadata_amount() has side effects, which we have | ||
| 1329 | * to be prepared undo if we fail to claim space. | ||
| 1330 | */ | ||
| 1331 | save_len = ei->i_da_metadata_calc_len; | ||
| 1332 | save_last_lblock = ei->i_da_metadata_calc_last_lblock; | ||
| 1333 | md_needed = EXT4_NUM_B2C(sbi, | ||
| 1334 | ext4_calc_metadata_amount(inode, lblock)); | ||
| 1335 | trace_ext4_da_reserve_space(inode, md_needed); | ||
| 1336 | |||
| 1337 | /* | ||
| 1338 | * We do still charge estimated metadata to the sb though; | ||
| 1339 | * we cannot afford to run out of free blocks. | ||
| 1340 | */ | ||
| 1341 | if (ext4_claim_free_clusters(sbi, md_needed, 0)) { | ||
| 1342 | ei->i_da_metadata_calc_len = save_len; | ||
| 1343 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | ||
| 1344 | spin_unlock(&ei->i_block_reservation_lock); | ||
| 1345 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
| 1346 | cond_resched(); | ||
| 1347 | goto repeat; | ||
| 1348 | } | ||
| 1349 | return -ENOSPC; | ||
| 1350 | } | ||
| 1351 | ei->i_reserved_meta_blocks += md_needed; | ||
| 1352 | spin_unlock(&ei->i_block_reservation_lock); | ||
| 1353 | |||
| 1354 | return 0; /* success */ | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | /* | ||
| 1219 | * Reserve a single cluster located at lblock | 1358 | * Reserve a single cluster located at lblock |
| 1220 | */ | 1359 | */ |
| 1221 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1360 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
| @@ -1263,7 +1402,7 @@ repeat: | |||
| 1263 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; | 1402 | ei->i_da_metadata_calc_last_lblock = save_last_lblock; |
| 1264 | spin_unlock(&ei->i_block_reservation_lock); | 1403 | spin_unlock(&ei->i_block_reservation_lock); |
| 1265 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1404 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
| 1266 | yield(); | 1405 | cond_resched(); |
| 1267 | goto repeat; | 1406 | goto repeat; |
| 1268 | } | 1407 | } |
| 1269 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | 1408 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
| @@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
| 1768 | struct extent_status es; | 1907 | struct extent_status es; |
| 1769 | int retval; | 1908 | int retval; |
| 1770 | sector_t invalid_block = ~((sector_t) 0xffff); | 1909 | sector_t invalid_block = ~((sector_t) 0xffff); |
| 1910 | #ifdef ES_AGGRESSIVE_TEST | ||
| 1911 | struct ext4_map_blocks orig_map; | ||
| 1912 | |||
| 1913 | memcpy(&orig_map, map, sizeof(*map)); | ||
| 1914 | #endif | ||
| 1771 | 1915 | ||
| 1772 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | 1916 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) |
| 1773 | invalid_block = ~0; | 1917 | invalid_block = ~0; |
| @@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
| 1809 | else | 1953 | else |
| 1810 | BUG_ON(1); | 1954 | BUG_ON(1); |
| 1811 | 1955 | ||
| 1956 | #ifdef ES_AGGRESSIVE_TEST | ||
| 1957 | ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); | ||
| 1958 | #endif | ||
| 1812 | return retval; | 1959 | return retval; |
| 1813 | } | 1960 | } |
| 1814 | 1961 | ||
| @@ -1843,8 +1990,11 @@ add_delayed: | |||
| 1843 | * XXX: __block_prepare_write() unmaps passed block, | 1990 | * XXX: __block_prepare_write() unmaps passed block, |
| 1844 | * is it OK? | 1991 | * is it OK? |
| 1845 | */ | 1992 | */ |
| 1846 | /* If the block was allocated from previously allocated cluster, | 1993 | /* |
| 1847 | * then we dont need to reserve it again. */ | 1994 | * If the block was allocated from previously allocated cluster, |
| 1995 | * then we don't need to reserve it again. However we still need | ||
| 1996 | * to reserve metadata for every block we're going to write. | ||
| 1997 | */ | ||
| 1848 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | 1998 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { |
| 1849 | ret = ext4_da_reserve_space(inode, iblock); | 1999 | ret = ext4_da_reserve_space(inode, iblock); |
| 1850 | if (ret) { | 2000 | if (ret) { |
| @@ -1852,6 +2002,13 @@ add_delayed: | |||
| 1852 | retval = ret; | 2002 | retval = ret; |
| 1853 | goto out_unlock; | 2003 | goto out_unlock; |
| 1854 | } | 2004 | } |
| 2005 | } else { | ||
| 2006 | ret = ext4_da_reserve_metadata(inode, iblock); | ||
| 2007 | if (ret) { | ||
| 2008 | /* not enough space to reserve */ | ||
| 2009 | retval = ret; | ||
| 2010 | goto out_unlock; | ||
| 2011 | } | ||
| 1855 | } | 2012 | } |
| 1856 | 2013 | ||
| 1857 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 2014 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
| @@ -1873,6 +2030,15 @@ add_delayed: | |||
| 1873 | int ret; | 2030 | int ret; |
| 1874 | unsigned long long status; | 2031 | unsigned long long status; |
| 1875 | 2032 | ||
| 2033 | #ifdef ES_AGGRESSIVE_TEST | ||
| 2034 | if (retval != map->m_len) { | ||
| 2035 | printk("ES len assertation failed for inode: %lu " | ||
| 2036 | "retval %d != map->m_len %d " | ||
| 2037 | "in %s (lookup)\n", inode->i_ino, retval, | ||
| 2038 | map->m_len, __func__); | ||
| 2039 | } | ||
| 2040 | #endif | ||
| 2041 | |||
| 1876 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 2042 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
| 1877 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 2043 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
| 1878 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 2044 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
| @@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
| 2908 | 3074 | ||
| 2909 | trace_ext4_releasepage(page); | 3075 | trace_ext4_releasepage(page); |
| 2910 | 3076 | ||
| 2911 | WARN_ON(PageChecked(page)); | 3077 | /* Page has dirty journalled data -> cannot release */ |
| 2912 | if (!page_has_buffers(page)) | 3078 | if (PageChecked(page)) |
| 2913 | return 0; | 3079 | return 0; |
| 2914 | if (journal) | 3080 | if (journal) |
| 2915 | return jbd2_journal_try_to_free_buffers(journal, page, wait); | 3081 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7bb713a46fe4..ee6614bdb639 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2804 | if (sbi->s_log_groups_per_flex) { | 2804 | if (sbi->s_log_groups_per_flex) { |
| 2805 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2805 | ext4_group_t flex_group = ext4_flex_group(sbi, |
| 2806 | ac->ac_b_ex.fe_group); | 2806 | ac->ac_b_ex.fe_group); |
| 2807 | atomic_sub(ac->ac_b_ex.fe_len, | 2807 | atomic64_sub(ac->ac_b_ex.fe_len, |
| 2808 | &sbi->s_flex_groups[flex_group].free_clusters); | 2808 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 2809 | } | 2809 | } |
| 2810 | 2810 | ||
| 2811 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 2811 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
| @@ -3692,11 +3692,7 @@ repeat: | |||
| 3692 | if (free < needed && busy) { | 3692 | if (free < needed && busy) { |
| 3693 | busy = 0; | 3693 | busy = 0; |
| 3694 | ext4_unlock_group(sb, group); | 3694 | ext4_unlock_group(sb, group); |
| 3695 | /* | 3695 | cond_resched(); |
| 3696 | * Yield the CPU here so that we don't get soft lockup | ||
| 3697 | * in non preempt case. | ||
| 3698 | */ | ||
| 3699 | yield(); | ||
| 3700 | goto repeat; | 3696 | goto repeat; |
| 3701 | } | 3697 | } |
| 3702 | 3698 | ||
| @@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4246 | ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { | 4242 | ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { |
| 4247 | 4243 | ||
| 4248 | /* let others to free the space */ | 4244 | /* let others to free the space */ |
| 4249 | yield(); | 4245 | cond_resched(); |
| 4250 | ar->len = ar->len >> 1; | 4246 | ar->len = ar->len >> 1; |
| 4251 | } | 4247 | } |
| 4252 | if (!ar->len) { | 4248 | if (!ar->len) { |
| @@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
| 4464 | struct buffer_head *bitmap_bh = NULL; | 4460 | struct buffer_head *bitmap_bh = NULL; |
| 4465 | struct super_block *sb = inode->i_sb; | 4461 | struct super_block *sb = inode->i_sb; |
| 4466 | struct ext4_group_desc *gdp; | 4462 | struct ext4_group_desc *gdp; |
| 4467 | unsigned long freed = 0; | ||
| 4468 | unsigned int overflow; | 4463 | unsigned int overflow; |
| 4469 | ext4_grpblk_t bit; | 4464 | ext4_grpblk_t bit; |
| 4470 | struct buffer_head *gd_bh; | 4465 | struct buffer_head *gd_bh; |
| @@ -4666,14 +4661,12 @@ do_more: | |||
| 4666 | 4661 | ||
| 4667 | if (sbi->s_log_groups_per_flex) { | 4662 | if (sbi->s_log_groups_per_flex) { |
| 4668 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 4663 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
| 4669 | atomic_add(count_clusters, | 4664 | atomic64_add(count_clusters, |
| 4670 | &sbi->s_flex_groups[flex_group].free_clusters); | 4665 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 4671 | } | 4666 | } |
| 4672 | 4667 | ||
| 4673 | ext4_mb_unload_buddy(&e4b); | 4668 | ext4_mb_unload_buddy(&e4b); |
| 4674 | 4669 | ||
| 4675 | freed += count; | ||
| 4676 | |||
| 4677 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | 4670 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) |
| 4678 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); | 4671 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); |
| 4679 | 4672 | ||
| @@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
| 4811 | 4804 | ||
| 4812 | if (sbi->s_log_groups_per_flex) { | 4805 | if (sbi->s_log_groups_per_flex) { |
| 4813 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 4806 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
| 4814 | atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), | 4807 | atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), |
| 4815 | &sbi->s_flex_groups[flex_group].free_clusters); | 4808 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 4816 | } | 4809 | } |
| 4817 | 4810 | ||
| 4818 | ext4_mb_unload_buddy(&e4b); | 4811 | ext4_mb_unload_buddy(&e4b); |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 4e81d47aa8cb..33e1c086858b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
| @@ -32,16 +32,18 @@ | |||
| 32 | */ | 32 | */ |
| 33 | static inline int | 33 | static inline int |
| 34 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, | 34 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, |
| 35 | struct ext4_ext_path **path) | 35 | struct ext4_ext_path **orig_path) |
| 36 | { | 36 | { |
| 37 | int ret = 0; | 37 | int ret = 0; |
| 38 | struct ext4_ext_path *path; | ||
| 38 | 39 | ||
| 39 | *path = ext4_ext_find_extent(inode, lblock, *path); | 40 | path = ext4_ext_find_extent(inode, lblock, *orig_path); |
| 40 | if (IS_ERR(*path)) { | 41 | if (IS_ERR(path)) |
| 41 | ret = PTR_ERR(*path); | 42 | ret = PTR_ERR(path); |
| 42 | *path = NULL; | 43 | else if (path[ext_depth(inode)].p_ext == NULL) |
| 43 | } else if ((*path)[ext_depth(inode)].p_ext == NULL) | ||
| 44 | ret = -ENODATA; | 44 | ret = -ENODATA; |
| 45 | else | ||
| 46 | *orig_path = path; | ||
| 45 | 47 | ||
| 46 | return ret; | 48 | return ret; |
| 47 | } | 49 | } |
| @@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, | |||
| 611 | { | 613 | { |
| 612 | struct ext4_ext_path *path = NULL; | 614 | struct ext4_ext_path *path = NULL; |
| 613 | struct ext4_extent *ext; | 615 | struct ext4_extent *ext; |
| 616 | int ret = 0; | ||
| 614 | ext4_lblk_t last = from + count; | 617 | ext4_lblk_t last = from + count; |
| 615 | while (from < last) { | 618 | while (from < last) { |
| 616 | *err = get_ext_path(inode, from, &path); | 619 | *err = get_ext_path(inode, from, &path); |
| 617 | if (*err) | 620 | if (*err) |
| 618 | return 0; | 621 | goto out; |
| 619 | ext = path[ext_depth(inode)].p_ext; | 622 | ext = path[ext_depth(inode)].p_ext; |
| 620 | if (!ext) { | 623 | if (uninit != ext4_ext_is_uninitialized(ext)) |
| 621 | ext4_ext_drop_refs(path); | 624 | goto out; |
| 622 | return 0; | ||
| 623 | } | ||
| 624 | if (uninit != ext4_ext_is_uninitialized(ext)) { | ||
| 625 | ext4_ext_drop_refs(path); | ||
| 626 | return 0; | ||
| 627 | } | ||
| 628 | from += ext4_ext_get_actual_len(ext); | 625 | from += ext4_ext_get_actual_len(ext); |
| 629 | ext4_ext_drop_refs(path); | 626 | ext4_ext_drop_refs(path); |
| 630 | } | 627 | } |
| 631 | return 1; | 628 | ret = 1; |
| 629 | out: | ||
| 630 | if (path) { | ||
| 631 | ext4_ext_drop_refs(path); | ||
| 632 | kfree(path); | ||
| 633 | } | ||
| 634 | return ret; | ||
| 632 | } | 635 | } |
| 633 | 636 | ||
| 634 | /** | 637 | /** |
| @@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
| 666 | int replaced_count = 0; | 669 | int replaced_count = 0; |
| 667 | int dext_alen; | 670 | int dext_alen; |
| 668 | 671 | ||
| 672 | *err = ext4_es_remove_extent(orig_inode, from, count); | ||
| 673 | if (*err) | ||
| 674 | goto out; | ||
| 675 | |||
| 676 | *err = ext4_es_remove_extent(donor_inode, from, count); | ||
| 677 | if (*err) | ||
| 678 | goto out; | ||
| 679 | |||
| 669 | /* Get the original extent for the block "orig_off" */ | 680 | /* Get the original extent for the block "orig_off" */ |
| 670 | *err = get_ext_path(orig_inode, orig_off, &orig_path); | 681 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
| 671 | if (*err) | 682 | if (*err) |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 809b31003ecc..047a6de04a0a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
| @@ -50,11 +50,21 @@ void ext4_exit_pageio(void) | |||
| 50 | kmem_cache_destroy(io_page_cachep); | 50 | kmem_cache_destroy(io_page_cachep); |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | void ext4_ioend_wait(struct inode *inode) | 53 | /* |
| 54 | * This function is called by ext4_evict_inode() to make sure there is | ||
| 55 | * no more pending I/O completion work left to do. | ||
| 56 | */ | ||
| 57 | void ext4_ioend_shutdown(struct inode *inode) | ||
| 54 | { | 58 | { |
| 55 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 59 | wait_queue_head_t *wq = ext4_ioend_wq(inode); |
| 56 | 60 | ||
| 57 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 61 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); |
| 62 | /* | ||
| 63 | * We need to make sure the work structure is finished being | ||
| 64 | * used before we let the inode get destroyed. | ||
| 65 | */ | ||
| 66 | if (work_pending(&EXT4_I(inode)->i_unwritten_work)) | ||
| 67 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | ||
| 58 | } | 68 | } |
| 59 | 69 | ||
| 60 | static void put_io_page(struct ext4_io_page *io_page) | 70 | static void put_io_page(struct ext4_io_page *io_page) |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b2c8ee56eb98..c169477a62c9 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb, | |||
| 1360 | sbi->s_log_groups_per_flex) { | 1360 | sbi->s_log_groups_per_flex) { |
| 1361 | ext4_group_t flex_group; | 1361 | ext4_group_t flex_group; |
| 1362 | flex_group = ext4_flex_group(sbi, group_data[0].group); | 1362 | flex_group = ext4_flex_group(sbi, group_data[0].group); |
| 1363 | atomic_add(EXT4_NUM_B2C(sbi, free_blocks), | 1363 | atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), |
| 1364 | &sbi->s_flex_groups[flex_group].free_clusters); | 1364 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 1365 | atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, | 1365 | atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, |
| 1366 | &sbi->s_flex_groups[flex_group].free_inodes); | 1366 | &sbi->s_flex_groups[flex_group].free_inodes); |
| 1367 | } | 1367 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b3818b48f418..5d6d53578124 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -1927,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
| 1927 | flex_group = ext4_flex_group(sbi, i); | 1927 | flex_group = ext4_flex_group(sbi, i); |
| 1928 | atomic_add(ext4_free_inodes_count(sb, gdp), | 1928 | atomic_add(ext4_free_inodes_count(sb, gdp), |
| 1929 | &sbi->s_flex_groups[flex_group].free_inodes); | 1929 | &sbi->s_flex_groups[flex_group].free_inodes); |
| 1930 | atomic_add(ext4_free_group_clusters(sb, gdp), | 1930 | atomic64_add(ext4_free_group_clusters(sb, gdp), |
| 1931 | &sbi->s_flex_groups[flex_group].free_clusters); | 1931 | &sbi->s_flex_groups[flex_group].free_clusters); |
| 1932 | atomic_add(ext4_used_dirs_count(sb, gdp), | 1932 | atomic_add(ext4_used_dirs_count(sb, gdp), |
| 1933 | &sbi->s_flex_groups[flex_group].used_dirs); | 1933 | &sbi->s_flex_groups[flex_group].used_dirs); |
| 1934 | } | 1934 | } |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 019f45e45097..d79c2dadc536 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -923,8 +923,11 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) | |||
| 923 | cmd = F_SETLK; | 923 | cmd = F_SETLK; |
| 924 | fl->fl_type = F_UNLCK; | 924 | fl->fl_type = F_UNLCK; |
| 925 | } | 925 | } |
| 926 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 926 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { |
| 927 | if (fl->fl_type == F_UNLCK) | ||
| 928 | posix_lock_file_wait(file, fl); | ||
| 927 | return -EIO; | 929 | return -EIO; |
| 930 | } | ||
| 928 | if (IS_GETLK(cmd)) | 931 | if (IS_GETLK(cmd)) |
| 929 | return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); | 932 | return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); |
| 930 | else if (fl->fl_type == F_UNLCK) | 933 | else if (fl->fl_type == F_UNLCK) |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 156e42ec84ea..5c29216e9cc1 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -588,6 +588,7 @@ struct lm_lockstruct { | |||
| 588 | struct dlm_lksb ls_control_lksb; /* control_lock */ | 588 | struct dlm_lksb ls_control_lksb; /* control_lock */ |
| 589 | char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ | 589 | char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ |
| 590 | struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ | 590 | struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ |
| 591 | char *ls_lvb_bits; | ||
| 591 | 592 | ||
| 592 | spinlock_t ls_recover_spin; /* protects following fields */ | 593 | spinlock_t ls_recover_spin; /* protects following fields */ |
| 593 | unsigned long ls_recover_flags; /* DFL_ */ | 594 | unsigned long ls_recover_flags; /* DFL_ */ |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 9802de0f85e6..c8423d6de6c3 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
| @@ -483,12 +483,8 @@ static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, | |||
| 483 | 483 | ||
| 484 | static int all_jid_bits_clear(char *lvb) | 484 | static int all_jid_bits_clear(char *lvb) |
| 485 | { | 485 | { |
| 486 | int i; | 486 | return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0, |
| 487 | for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { | 487 | GDLM_LVB_SIZE - JID_BITMAP_OFFSET); |
| 488 | if (lvb[i]) | ||
| 489 | return 0; | ||
| 490 | } | ||
| 491 | return 1; | ||
| 492 | } | 488 | } |
| 493 | 489 | ||
| 494 | static void sync_wait_cb(void *arg) | 490 | static void sync_wait_cb(void *arg) |
| @@ -580,7 +576,6 @@ static void gfs2_control_func(struct work_struct *work) | |||
| 580 | { | 576 | { |
| 581 | struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); | 577 | struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); |
| 582 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 578 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
| 583 | char lvb_bits[GDLM_LVB_SIZE]; | ||
| 584 | uint32_t block_gen, start_gen, lvb_gen, flags; | 579 | uint32_t block_gen, start_gen, lvb_gen, flags; |
| 585 | int recover_set = 0; | 580 | int recover_set = 0; |
| 586 | int write_lvb = 0; | 581 | int write_lvb = 0; |
| @@ -634,7 +629,7 @@ static void gfs2_control_func(struct work_struct *work) | |||
| 634 | return; | 629 | return; |
| 635 | } | 630 | } |
| 636 | 631 | ||
| 637 | control_lvb_read(ls, &lvb_gen, lvb_bits); | 632 | control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); |
| 638 | 633 | ||
| 639 | spin_lock(&ls->ls_recover_spin); | 634 | spin_lock(&ls->ls_recover_spin); |
| 640 | if (block_gen != ls->ls_recover_block || | 635 | if (block_gen != ls->ls_recover_block || |
| @@ -664,10 +659,10 @@ static void gfs2_control_func(struct work_struct *work) | |||
| 664 | 659 | ||
| 665 | ls->ls_recover_result[i] = 0; | 660 | ls->ls_recover_result[i] = 0; |
| 666 | 661 | ||
| 667 | if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) | 662 | if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) |
| 668 | continue; | 663 | continue; |
| 669 | 664 | ||
| 670 | __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); | 665 | __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); |
| 671 | write_lvb = 1; | 666 | write_lvb = 1; |
| 672 | } | 667 | } |
| 673 | } | 668 | } |
| @@ -691,7 +686,7 @@ static void gfs2_control_func(struct work_struct *work) | |||
| 691 | continue; | 686 | continue; |
| 692 | if (ls->ls_recover_submit[i] < start_gen) { | 687 | if (ls->ls_recover_submit[i] < start_gen) { |
| 693 | ls->ls_recover_submit[i] = 0; | 688 | ls->ls_recover_submit[i] = 0; |
| 694 | __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); | 689 | __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); |
| 695 | } | 690 | } |
| 696 | } | 691 | } |
| 697 | /* even if there are no bits to set, we need to write the | 692 | /* even if there are no bits to set, we need to write the |
| @@ -705,7 +700,7 @@ static void gfs2_control_func(struct work_struct *work) | |||
| 705 | spin_unlock(&ls->ls_recover_spin); | 700 | spin_unlock(&ls->ls_recover_spin); |
| 706 | 701 | ||
| 707 | if (write_lvb) { | 702 | if (write_lvb) { |
| 708 | control_lvb_write(ls, start_gen, lvb_bits); | 703 | control_lvb_write(ls, start_gen, ls->ls_lvb_bits); |
| 709 | flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; | 704 | flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; |
| 710 | } else { | 705 | } else { |
| 711 | flags = DLM_LKF_CONVERT; | 706 | flags = DLM_LKF_CONVERT; |
| @@ -725,7 +720,7 @@ static void gfs2_control_func(struct work_struct *work) | |||
| 725 | */ | 720 | */ |
| 726 | 721 | ||
| 727 | for (i = 0; i < recover_size; i++) { | 722 | for (i = 0; i < recover_size; i++) { |
| 728 | if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { | 723 | if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) { |
| 729 | fs_info(sdp, "recover generation %u jid %d\n", | 724 | fs_info(sdp, "recover generation %u jid %d\n", |
| 730 | start_gen, i); | 725 | start_gen, i); |
| 731 | gfs2_recover_set(sdp, i); | 726 | gfs2_recover_set(sdp, i); |
| @@ -758,7 +753,6 @@ static void gfs2_control_func(struct work_struct *work) | |||
| 758 | static int control_mount(struct gfs2_sbd *sdp) | 753 | static int control_mount(struct gfs2_sbd *sdp) |
| 759 | { | 754 | { |
| 760 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 755 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
| 761 | char lvb_bits[GDLM_LVB_SIZE]; | ||
| 762 | uint32_t start_gen, block_gen, mount_gen, lvb_gen; | 756 | uint32_t start_gen, block_gen, mount_gen, lvb_gen; |
| 763 | int mounted_mode; | 757 | int mounted_mode; |
| 764 | int retries = 0; | 758 | int retries = 0; |
| @@ -857,7 +851,7 @@ locks_done: | |||
| 857 | * lvb_gen will be non-zero. | 851 | * lvb_gen will be non-zero. |
| 858 | */ | 852 | */ |
| 859 | 853 | ||
| 860 | control_lvb_read(ls, &lvb_gen, lvb_bits); | 854 | control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); |
| 861 | 855 | ||
| 862 | if (lvb_gen == 0xFFFFFFFF) { | 856 | if (lvb_gen == 0xFFFFFFFF) { |
| 863 | /* special value to force mount attempts to fail */ | 857 | /* special value to force mount attempts to fail */ |
| @@ -887,7 +881,7 @@ locks_done: | |||
| 887 | * and all lvb bits to be clear (no pending journal recoveries.) | 881 | * and all lvb bits to be clear (no pending journal recoveries.) |
| 888 | */ | 882 | */ |
| 889 | 883 | ||
| 890 | if (!all_jid_bits_clear(lvb_bits)) { | 884 | if (!all_jid_bits_clear(ls->ls_lvb_bits)) { |
| 891 | /* journals need recovery, wait until all are clear */ | 885 | /* journals need recovery, wait until all are clear */ |
| 892 | fs_info(sdp, "control_mount wait for journal recovery\n"); | 886 | fs_info(sdp, "control_mount wait for journal recovery\n"); |
| 893 | goto restart; | 887 | goto restart; |
| @@ -949,7 +943,6 @@ static int dlm_recovery_wait(void *word) | |||
| 949 | static int control_first_done(struct gfs2_sbd *sdp) | 943 | static int control_first_done(struct gfs2_sbd *sdp) |
| 950 | { | 944 | { |
| 951 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 945 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
| 952 | char lvb_bits[GDLM_LVB_SIZE]; | ||
| 953 | uint32_t start_gen, block_gen; | 946 | uint32_t start_gen, block_gen; |
| 954 | int error; | 947 | int error; |
| 955 | 948 | ||
| @@ -991,8 +984,8 @@ restart: | |||
| 991 | memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); | 984 | memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); |
| 992 | spin_unlock(&ls->ls_recover_spin); | 985 | spin_unlock(&ls->ls_recover_spin); |
| 993 | 986 | ||
| 994 | memset(lvb_bits, 0, sizeof(lvb_bits)); | 987 | memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE); |
| 995 | control_lvb_write(ls, start_gen, lvb_bits); | 988 | control_lvb_write(ls, start_gen, ls->ls_lvb_bits); |
| 996 | 989 | ||
| 997 | error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); | 990 | error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); |
| 998 | if (error) | 991 | if (error) |
| @@ -1022,6 +1015,12 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, | |||
| 1022 | uint32_t old_size, new_size; | 1015 | uint32_t old_size, new_size; |
| 1023 | int i, max_jid; | 1016 | int i, max_jid; |
| 1024 | 1017 | ||
| 1018 | if (!ls->ls_lvb_bits) { | ||
| 1019 | ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); | ||
| 1020 | if (!ls->ls_lvb_bits) | ||
| 1021 | return -ENOMEM; | ||
| 1022 | } | ||
| 1023 | |||
| 1025 | max_jid = 0; | 1024 | max_jid = 0; |
| 1026 | for (i = 0; i < num_slots; i++) { | 1025 | for (i = 0; i < num_slots; i++) { |
| 1027 | if (max_jid < slots[i].slot - 1) | 1026 | if (max_jid < slots[i].slot - 1) |
| @@ -1057,6 +1056,7 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, | |||
| 1057 | 1056 | ||
| 1058 | static void free_recover_size(struct lm_lockstruct *ls) | 1057 | static void free_recover_size(struct lm_lockstruct *ls) |
| 1059 | { | 1058 | { |
| 1059 | kfree(ls->ls_lvb_bits); | ||
| 1060 | kfree(ls->ls_recover_submit); | 1060 | kfree(ls->ls_recover_submit); |
| 1061 | kfree(ls->ls_recover_result); | 1061 | kfree(ls->ls_recover_result); |
| 1062 | ls->ls_recover_submit = NULL; | 1062 | ls->ls_recover_submit = NULL; |
| @@ -1205,6 +1205,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) | |||
| 1205 | ls->ls_recover_size = 0; | 1205 | ls->ls_recover_size = 0; |
| 1206 | ls->ls_recover_submit = NULL; | 1206 | ls->ls_recover_submit = NULL; |
| 1207 | ls->ls_recover_result = NULL; | 1207 | ls->ls_recover_result = NULL; |
| 1208 | ls->ls_lvb_bits = NULL; | ||
| 1208 | 1209 | ||
| 1209 | error = set_recover_size(sdp, NULL, 0); | 1210 | error = set_recover_size(sdp, NULL, 0); |
| 1210 | if (error) | 1211 | if (error) |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index d1f51fd73f86..5a51265a4341 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -576,7 +576,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) | |||
| 576 | RB_CLEAR_NODE(&ip->i_res->rs_node); | 576 | RB_CLEAR_NODE(&ip->i_res->rs_node); |
| 577 | out: | 577 | out: |
| 578 | up_write(&ip->i_rw_mutex); | 578 | up_write(&ip->i_rw_mutex); |
| 579 | return 0; | 579 | return error; |
| 580 | } | 580 | } |
| 581 | 581 | ||
| 582 | static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) | 582 | static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) |
| @@ -1181,12 +1181,9 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
| 1181 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) | 1181 | const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) |
| 1182 | { | 1182 | { |
| 1183 | struct super_block *sb = sdp->sd_vfs; | 1183 | struct super_block *sb = sdp->sd_vfs; |
| 1184 | struct block_device *bdev = sb->s_bdev; | ||
| 1185 | const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / | ||
| 1186 | bdev_logical_block_size(sb->s_bdev); | ||
| 1187 | u64 blk; | 1184 | u64 blk; |
| 1188 | sector_t start = 0; | 1185 | sector_t start = 0; |
| 1189 | sector_t nr_sects = 0; | 1186 | sector_t nr_blks = 0; |
| 1190 | int rv; | 1187 | int rv; |
| 1191 | unsigned int x; | 1188 | unsigned int x; |
| 1192 | u32 trimmed = 0; | 1189 | u32 trimmed = 0; |
| @@ -1206,35 +1203,34 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
| 1206 | if (diff == 0) | 1203 | if (diff == 0) |
| 1207 | continue; | 1204 | continue; |
| 1208 | blk = offset + ((bi->bi_start + x) * GFS2_NBBY); | 1205 | blk = offset + ((bi->bi_start + x) * GFS2_NBBY); |
| 1209 | blk *= sects_per_blk; /* convert to sectors */ | ||
| 1210 | while(diff) { | 1206 | while(diff) { |
| 1211 | if (diff & 1) { | 1207 | if (diff & 1) { |
| 1212 | if (nr_sects == 0) | 1208 | if (nr_blks == 0) |
| 1213 | goto start_new_extent; | 1209 | goto start_new_extent; |
| 1214 | if ((start + nr_sects) != blk) { | 1210 | if ((start + nr_blks) != blk) { |
| 1215 | if (nr_sects >= minlen) { | 1211 | if (nr_blks >= minlen) { |
| 1216 | rv = blkdev_issue_discard(bdev, | 1212 | rv = sb_issue_discard(sb, |
| 1217 | start, nr_sects, | 1213 | start, nr_blks, |
| 1218 | GFP_NOFS, 0); | 1214 | GFP_NOFS, 0); |
| 1219 | if (rv) | 1215 | if (rv) |
| 1220 | goto fail; | 1216 | goto fail; |
| 1221 | trimmed += nr_sects; | 1217 | trimmed += nr_blks; |
| 1222 | } | 1218 | } |
| 1223 | nr_sects = 0; | 1219 | nr_blks = 0; |
| 1224 | start_new_extent: | 1220 | start_new_extent: |
| 1225 | start = blk; | 1221 | start = blk; |
| 1226 | } | 1222 | } |
| 1227 | nr_sects += sects_per_blk; | 1223 | nr_blks++; |
| 1228 | } | 1224 | } |
| 1229 | diff >>= 2; | 1225 | diff >>= 2; |
| 1230 | blk += sects_per_blk; | 1226 | blk++; |
| 1231 | } | 1227 | } |
| 1232 | } | 1228 | } |
| 1233 | if (nr_sects >= minlen) { | 1229 | if (nr_blks >= minlen) { |
| 1234 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); | 1230 | rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0); |
| 1235 | if (rv) | 1231 | if (rv) |
| 1236 | goto fail; | 1232 | goto fail; |
| 1237 | trimmed += nr_sects; | 1233 | trimmed += nr_blks; |
| 1238 | } | 1234 | } |
| 1239 | if (ptrimmed) | 1235 | if (ptrimmed) |
| 1240 | *ptrimmed = trimmed; | 1236 | *ptrimmed = trimmed; |
diff --git a/fs/internal.h b/fs/internal.h index 507141fceb99..4be78237d896 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool); | |||
| 125 | * dcache.c | 125 | * dcache.c |
| 126 | */ | 126 | */ |
| 127 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | 127 | extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); |
| 128 | |||
| 129 | /* | ||
| 130 | * read_write.c | ||
| 131 | */ | ||
| 132 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d6ee5aed56b1..325bc019ed88 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -1065,9 +1065,12 @@ out: | |||
| 1065 | void jbd2_journal_set_triggers(struct buffer_head *bh, | 1065 | void jbd2_journal_set_triggers(struct buffer_head *bh, |
| 1066 | struct jbd2_buffer_trigger_type *type) | 1066 | struct jbd2_buffer_trigger_type *type) |
| 1067 | { | 1067 | { |
| 1068 | struct journal_head *jh = bh2jh(bh); | 1068 | struct journal_head *jh = jbd2_journal_grab_journal_head(bh); |
| 1069 | 1069 | ||
| 1070 | if (WARN_ON(!jh)) | ||
| 1071 | return; | ||
| 1070 | jh->b_triggers = type; | 1072 | jh->b_triggers = type; |
| 1073 | jbd2_journal_put_journal_head(jh); | ||
| 1071 | } | 1074 | } |
| 1072 | 1075 | ||
| 1073 | void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, | 1076 | void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, |
| @@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1119 | { | 1122 | { |
| 1120 | transaction_t *transaction = handle->h_transaction; | 1123 | transaction_t *transaction = handle->h_transaction; |
| 1121 | journal_t *journal = transaction->t_journal; | 1124 | journal_t *journal = transaction->t_journal; |
| 1122 | struct journal_head *jh = bh2jh(bh); | 1125 | struct journal_head *jh; |
| 1123 | int ret = 0; | 1126 | int ret = 0; |
| 1124 | 1127 | ||
| 1125 | jbd_debug(5, "journal_head %p\n", jh); | ||
| 1126 | JBUFFER_TRACE(jh, "entry"); | ||
| 1127 | if (is_handle_aborted(handle)) | 1128 | if (is_handle_aborted(handle)) |
| 1128 | goto out; | 1129 | goto out; |
| 1129 | if (!buffer_jbd(bh)) { | 1130 | jh = jbd2_journal_grab_journal_head(bh); |
| 1131 | if (!jh) { | ||
| 1130 | ret = -EUCLEAN; | 1132 | ret = -EUCLEAN; |
| 1131 | goto out; | 1133 | goto out; |
| 1132 | } | 1134 | } |
| 1135 | jbd_debug(5, "journal_head %p\n", jh); | ||
| 1136 | JBUFFER_TRACE(jh, "entry"); | ||
| 1133 | 1137 | ||
| 1134 | jbd_lock_bh_state(bh); | 1138 | jbd_lock_bh_state(bh); |
| 1135 | 1139 | ||
| @@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1220 | spin_unlock(&journal->j_list_lock); | 1224 | spin_unlock(&journal->j_list_lock); |
| 1221 | out_unlock_bh: | 1225 | out_unlock_bh: |
| 1222 | jbd_unlock_bh_state(bh); | 1226 | jbd_unlock_bh_state(bh); |
| 1227 | jbd2_journal_put_journal_head(jh); | ||
| 1223 | out: | 1228 | out: |
| 1224 | JBUFFER_TRACE(jh, "exit"); | 1229 | JBUFFER_TRACE(jh, "exit"); |
| 1225 | WARN_ON(ret); /* All errors are bugs, so dump the stack */ | 1230 | WARN_ON(ret); /* All errors are bugs, so dump the stack */ |
diff --git a/fs/namespace.c b/fs/namespace.c index 50ca17d3cb45..d581e45c0a9f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
| 798 | } | 798 | } |
| 799 | 799 | ||
| 800 | mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; | 800 | mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; |
| 801 | /* Don't allow unprivileged users to change mount flags */ | ||
| 802 | if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) | ||
| 803 | mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; | ||
| 804 | |||
| 801 | atomic_inc(&sb->s_active); | 805 | atomic_inc(&sb->s_active); |
| 802 | mnt->mnt.mnt_sb = sb; | 806 | mnt->mnt.mnt_sb = sb; |
| 803 | mnt->mnt.mnt_root = dget(root); | 807 | mnt->mnt.mnt_root = dget(root); |
| @@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) | |||
| 1713 | if (readonly_request == __mnt_is_readonly(mnt)) | 1717 | if (readonly_request == __mnt_is_readonly(mnt)) |
| 1714 | return 0; | 1718 | return 0; |
| 1715 | 1719 | ||
| 1720 | if (mnt->mnt_flags & MNT_LOCK_READONLY) | ||
| 1721 | return -EPERM; | ||
| 1722 | |||
| 1716 | if (readonly_request) | 1723 | if (readonly_request) |
| 1717 | error = mnt_make_readonly(real_mount(mnt)); | 1724 | error = mnt_make_readonly(real_mount(mnt)); |
| 1718 | else | 1725 | else |
| @@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
| 2339 | /* First pass: copy the tree topology */ | 2346 | /* First pass: copy the tree topology */ |
| 2340 | copy_flags = CL_COPY_ALL | CL_EXPIRE; | 2347 | copy_flags = CL_COPY_ALL | CL_EXPIRE; |
| 2341 | if (user_ns != mnt_ns->user_ns) | 2348 | if (user_ns != mnt_ns->user_ns) |
| 2342 | copy_flags |= CL_SHARED_TO_SLAVE; | 2349 | copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; |
| 2343 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); | 2350 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); |
| 2344 | if (IS_ERR(new)) { | 2351 | if (IS_ERR(new)) { |
| 2345 | up_write(&namespace_sem); | 2352 | up_write(&namespace_sem); |
| @@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt) | |||
| 2732 | return check_mnt(real_mount(mnt)); | 2739 | return check_mnt(real_mount(mnt)); |
| 2733 | } | 2740 | } |
| 2734 | 2741 | ||
| 2742 | bool current_chrooted(void) | ||
| 2743 | { | ||
| 2744 | /* Does the current process have a non-standard root */ | ||
| 2745 | struct path ns_root; | ||
| 2746 | struct path fs_root; | ||
| 2747 | bool chrooted; | ||
| 2748 | |||
| 2749 | /* Find the namespace root */ | ||
| 2750 | ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt; | ||
| 2751 | ns_root.dentry = ns_root.mnt->mnt_root; | ||
| 2752 | path_get(&ns_root); | ||
| 2753 | while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) | ||
| 2754 | ; | ||
| 2755 | |||
| 2756 | get_fs_root(current->fs, &fs_root); | ||
| 2757 | |||
| 2758 | chrooted = !path_equal(&fs_root, &ns_root); | ||
| 2759 | |||
| 2760 | path_put(&fs_root); | ||
| 2761 | path_put(&ns_root); | ||
| 2762 | |||
| 2763 | return chrooted; | ||
| 2764 | } | ||
| 2765 | |||
| 2766 | void update_mnt_policy(struct user_namespace *userns) | ||
| 2767 | { | ||
| 2768 | struct mnt_namespace *ns = current->nsproxy->mnt_ns; | ||
| 2769 | struct mount *mnt; | ||
| 2770 | |||
| 2771 | down_read(&namespace_sem); | ||
| 2772 | list_for_each_entry(mnt, &ns->list, mnt_list) { | ||
| 2773 | switch (mnt->mnt.mnt_sb->s_magic) { | ||
| 2774 | case SYSFS_MAGIC: | ||
| 2775 | userns->may_mount_sysfs = true; | ||
| 2776 | break; | ||
| 2777 | case PROC_SUPER_MAGIC: | ||
| 2778 | userns->may_mount_proc = true; | ||
| 2779 | break; | ||
| 2780 | } | ||
| 2781 | if (userns->may_mount_sysfs && userns->may_mount_proc) | ||
| 2782 | break; | ||
| 2783 | } | ||
| 2784 | up_read(&namespace_sem); | ||
| 2785 | } | ||
| 2786 | |||
| 2735 | static void *mntns_get(struct task_struct *task) | 2787 | static void *mntns_get(struct task_struct *task) |
| 2736 | { | 2788 | { |
| 2737 | struct mnt_namespace *ns = NULL; | 2789 | struct mnt_namespace *ns = NULL; |
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 737d839bc17b..6fc7b5cae92b 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c | |||
| @@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev) | |||
| 55 | 55 | ||
| 56 | bl_pipe_msg.bl_wq = &nn->bl_wq; | 56 | bl_pipe_msg.bl_wq = &nn->bl_wq; |
| 57 | memset(msg, 0, sizeof(*msg)); | 57 | memset(msg, 0, sizeof(*msg)); |
| 58 | msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); | 58 | msg->len = sizeof(bl_msg) + bl_msg.totallen; |
| 59 | msg->data = kzalloc(msg->len, GFP_NOFS); | ||
| 59 | if (!msg->data) | 60 | if (!msg->data) |
| 60 | goto out; | 61 | goto out; |
| 61 | 62 | ||
| @@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev) | |||
| 66 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | 67 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); |
| 67 | dataptr = (uint8_t *) msg->data; | 68 | dataptr = (uint8_t *) msg->data; |
| 68 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | 69 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); |
| 69 | msg->len = sizeof(bl_msg) + bl_msg.totallen; | ||
| 70 | 70 | ||
| 71 | add_wait_queue(&nn->bl_wq, &wq); | 71 | add_wait_queue(&nn->bl_wq, &wq); |
| 72 | if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { | 72 | if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index dc0f98dfa717..c516da5873fd 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
| @@ -726,9 +726,9 @@ out1: | |||
| 726 | return ret; | 726 | return ret; |
| 727 | } | 727 | } |
| 728 | 728 | ||
| 729 | static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) | 729 | static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen) |
| 730 | { | 730 | { |
| 731 | return key_instantiate_and_link(key, data, strlen(data) + 1, | 731 | return key_instantiate_and_link(key, data, datalen, |
| 732 | id_resolver_cache->thread_keyring, | 732 | id_resolver_cache->thread_keyring, |
| 733 | authkey); | 733 | authkey); |
| 734 | } | 734 | } |
| @@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, | |||
| 738 | struct key *key, struct key *authkey) | 738 | struct key *key, struct key *authkey) |
| 739 | { | 739 | { |
| 740 | char id_str[NFS_UINT_MAXLEN]; | 740 | char id_str[NFS_UINT_MAXLEN]; |
| 741 | size_t len; | ||
| 741 | int ret = -ENOKEY; | 742 | int ret = -ENOKEY; |
| 742 | 743 | ||
| 743 | /* ret = -ENOKEY */ | 744 | /* ret = -ENOKEY */ |
| @@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, | |||
| 747 | case IDMAP_CONV_NAMETOID: | 748 | case IDMAP_CONV_NAMETOID: |
| 748 | if (strcmp(upcall->im_name, im->im_name) != 0) | 749 | if (strcmp(upcall->im_name, im->im_name) != 0) |
| 749 | break; | 750 | break; |
| 750 | sprintf(id_str, "%d", im->im_id); | 751 | /* Note: here we store the NUL terminator too */ |
| 751 | ret = nfs_idmap_instantiate(key, authkey, id_str); | 752 | len = sprintf(id_str, "%d", im->im_id) + 1; |
| 753 | ret = nfs_idmap_instantiate(key, authkey, id_str, len); | ||
| 752 | break; | 754 | break; |
| 753 | case IDMAP_CONV_IDTONAME: | 755 | case IDMAP_CONV_IDTONAME: |
| 754 | if (upcall->im_id != im->im_id) | 756 | if (upcall->im_id != im->im_id) |
| 755 | break; | 757 | break; |
| 756 | ret = nfs_idmap_instantiate(key, authkey, im->im_name); | 758 | len = strlen(im->im_name); |
| 759 | ret = nfs_idmap_instantiate(key, authkey, im->im_name, len); | ||
| 757 | break; | 760 | break; |
| 758 | default: | 761 | default: |
| 759 | ret = -EINVAL; | 762 | ret = -EINVAL; |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 49eeb044c109..4fb234d3aefb 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
| @@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) | |||
| 129 | { | 129 | { |
| 130 | if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | 130 | if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) |
| 131 | return; | 131 | return; |
| 132 | clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); | ||
| 133 | pnfs_return_layout(inode); | 132 | pnfs_return_layout(inode); |
| 134 | } | 133 | } |
| 135 | 134 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2671cb0f901..26431cf62ddb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 2632 | int status; | 2632 | int status; |
| 2633 | 2633 | ||
| 2634 | if (pnfs_ld_layoutret_on_setattr(inode)) | 2634 | if (pnfs_ld_layoutret_on_setattr(inode)) |
| 2635 | pnfs_return_layout(inode); | 2635 | pnfs_commit_and_return_layout(inode); |
| 2636 | 2636 | ||
| 2637 | nfs_fattr_init(fattr); | 2637 | nfs_fattr_init(fattr); |
| 2638 | 2638 | ||
| @@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | |||
| 6416 | static void nfs4_layoutcommit_release(void *calldata) | 6416 | static void nfs4_layoutcommit_release(void *calldata) |
| 6417 | { | 6417 | { |
| 6418 | struct nfs4_layoutcommit_data *data = calldata; | 6418 | struct nfs4_layoutcommit_data *data = calldata; |
| 6419 | struct pnfs_layout_segment *lseg, *tmp; | ||
| 6420 | unsigned long *bitlock = &NFS_I(data->args.inode)->flags; | ||
| 6421 | 6419 | ||
| 6422 | pnfs_cleanup_layoutcommit(data); | 6420 | pnfs_cleanup_layoutcommit(data); |
| 6423 | /* Matched by references in pnfs_set_layoutcommit */ | ||
| 6424 | list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { | ||
| 6425 | list_del_init(&lseg->pls_lc_list); | ||
| 6426 | if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, | ||
| 6427 | &lseg->pls_flags)) | ||
| 6428 | pnfs_put_lseg(lseg); | ||
| 6429 | } | ||
| 6430 | |||
| 6431 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | ||
| 6432 | smp_mb__after_clear_bit(); | ||
| 6433 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | ||
| 6434 | |||
| 6435 | put_rpccred(data->cred); | 6421 | put_rpccred(data->cred); |
| 6436 | kfree(data); | 6422 | kfree(data); |
| 6437 | } | 6423 | } |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 48ac5aad6258..4bdffe0ba025 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range, | |||
| 417 | lo_seg_intersecting(lseg_range, recall_range); | 417 | lo_seg_intersecting(lseg_range, recall_range); |
| 418 | } | 418 | } |
| 419 | 419 | ||
| 420 | static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, | ||
| 421 | struct list_head *tmp_list) | ||
| 422 | { | ||
| 423 | if (!atomic_dec_and_test(&lseg->pls_refcount)) | ||
| 424 | return false; | ||
| 425 | pnfs_layout_remove_lseg(lseg->pls_layout, lseg); | ||
| 426 | list_add(&lseg->pls_list, tmp_list); | ||
| 427 | return true; | ||
| 428 | } | ||
| 429 | |||
| 420 | /* Returns 1 if lseg is removed from list, 0 otherwise */ | 430 | /* Returns 1 if lseg is removed from list, 0 otherwise */ |
| 421 | static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | 431 | static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, |
| 422 | struct list_head *tmp_list) | 432 | struct list_head *tmp_list) |
| @@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
| 430 | */ | 440 | */ |
| 431 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, | 441 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, |
| 432 | atomic_read(&lseg->pls_refcount)); | 442 | atomic_read(&lseg->pls_refcount)); |
| 433 | if (atomic_dec_and_test(&lseg->pls_refcount)) { | 443 | if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) |
| 434 | pnfs_layout_remove_lseg(lseg->pls_layout, lseg); | ||
| 435 | list_add(&lseg->pls_list, tmp_list); | ||
| 436 | rv = 1; | 444 | rv = 1; |
| 437 | } | ||
| 438 | } | 445 | } |
| 439 | return rv; | 446 | return rv; |
| 440 | } | 447 | } |
| @@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
| 777 | return lseg; | 784 | return lseg; |
| 778 | } | 785 | } |
| 779 | 786 | ||
| 787 | static void pnfs_clear_layoutcommit(struct inode *inode, | ||
| 788 | struct list_head *head) | ||
| 789 | { | ||
| 790 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 791 | struct pnfs_layout_segment *lseg, *tmp; | ||
| 792 | |||
| 793 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | ||
| 794 | return; | ||
| 795 | list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { | ||
| 796 | if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) | ||
| 797 | continue; | ||
| 798 | pnfs_lseg_dec_and_remove_zero(lseg, head); | ||
| 799 | } | ||
| 800 | } | ||
| 801 | |||
| 780 | /* | 802 | /* |
| 781 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr | 803 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr |
| 782 | * when the layout segment list is empty. | 804 | * when the layout segment list is empty. |
| @@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino) | |||
| 808 | /* Reference matched in nfs4_layoutreturn_release */ | 830 | /* Reference matched in nfs4_layoutreturn_release */ |
| 809 | pnfs_get_layout_hdr(lo); | 831 | pnfs_get_layout_hdr(lo); |
| 810 | empty = list_empty(&lo->plh_segs); | 832 | empty = list_empty(&lo->plh_segs); |
| 833 | pnfs_clear_layoutcommit(ino, &tmp_list); | ||
| 811 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 834 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
| 812 | /* Don't send a LAYOUTRETURN if list was initially empty */ | 835 | /* Don't send a LAYOUTRETURN if list was initially empty */ |
| 813 | if (empty) { | 836 | if (empty) { |
| @@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino) | |||
| 820 | spin_unlock(&ino->i_lock); | 843 | spin_unlock(&ino->i_lock); |
| 821 | pnfs_free_lseg_list(&tmp_list); | 844 | pnfs_free_lseg_list(&tmp_list); |
| 822 | 845 | ||
| 823 | WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); | ||
| 824 | |||
| 825 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | 846 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); |
| 826 | if (unlikely(lrp == NULL)) { | 847 | if (unlikely(lrp == NULL)) { |
| 827 | status = -ENOMEM; | 848 | status = -ENOMEM; |
| @@ -845,6 +866,33 @@ out: | |||
| 845 | } | 866 | } |
| 846 | EXPORT_SYMBOL_GPL(_pnfs_return_layout); | 867 | EXPORT_SYMBOL_GPL(_pnfs_return_layout); |
| 847 | 868 | ||
| 869 | int | ||
| 870 | pnfs_commit_and_return_layout(struct inode *inode) | ||
| 871 | { | ||
| 872 | struct pnfs_layout_hdr *lo; | ||
| 873 | int ret; | ||
| 874 | |||
| 875 | spin_lock(&inode->i_lock); | ||
| 876 | lo = NFS_I(inode)->layout; | ||
| 877 | if (lo == NULL) { | ||
| 878 | spin_unlock(&inode->i_lock); | ||
| 879 | return 0; | ||
| 880 | } | ||
| 881 | pnfs_get_layout_hdr(lo); | ||
| 882 | /* Block new layoutgets and read/write to ds */ | ||
| 883 | lo->plh_block_lgets++; | ||
| 884 | spin_unlock(&inode->i_lock); | ||
| 885 | filemap_fdatawait(inode->i_mapping); | ||
| 886 | ret = pnfs_layoutcommit_inode(inode, true); | ||
| 887 | if (ret == 0) | ||
| 888 | ret = _pnfs_return_layout(inode); | ||
| 889 | spin_lock(&inode->i_lock); | ||
| 890 | lo->plh_block_lgets--; | ||
| 891 | spin_unlock(&inode->i_lock); | ||
| 892 | pnfs_put_layout_hdr(lo); | ||
| 893 | return ret; | ||
| 894 | } | ||
| 895 | |||
| 848 | bool pnfs_roc(struct inode *ino) | 896 | bool pnfs_roc(struct inode *ino) |
| 849 | { | 897 | { |
| 850 | struct pnfs_layout_hdr *lo; | 898 | struct pnfs_layout_hdr *lo; |
| @@ -1458,7 +1506,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) | |||
| 1458 | dprintk("pnfs write error = %d\n", hdr->pnfs_error); | 1506 | dprintk("pnfs write error = %d\n", hdr->pnfs_error); |
| 1459 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & | 1507 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & |
| 1460 | PNFS_LAYOUTRET_ON_ERROR) { | 1508 | PNFS_LAYOUTRET_ON_ERROR) { |
| 1461 | clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); | ||
| 1462 | pnfs_return_layout(hdr->inode); | 1509 | pnfs_return_layout(hdr->inode); |
| 1463 | } | 1510 | } |
| 1464 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) | 1511 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) |
| @@ -1613,7 +1660,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | |||
| 1613 | dprintk("pnfs read error = %d\n", hdr->pnfs_error); | 1660 | dprintk("pnfs read error = %d\n", hdr->pnfs_error); |
| 1614 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & | 1661 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & |
| 1615 | PNFS_LAYOUTRET_ON_ERROR) { | 1662 | PNFS_LAYOUTRET_ON_ERROR) { |
| 1616 | clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); | ||
| 1617 | pnfs_return_layout(hdr->inode); | 1663 | pnfs_return_layout(hdr->inode); |
| 1618 | } | 1664 | } |
| 1619 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) | 1665 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) |
| @@ -1746,11 +1792,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) | |||
| 1746 | 1792 | ||
| 1747 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { | 1793 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { |
| 1748 | if (lseg->pls_range.iomode == IOMODE_RW && | 1794 | if (lseg->pls_range.iomode == IOMODE_RW && |
| 1749 | test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) | 1795 | test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) |
| 1750 | list_add(&lseg->pls_lc_list, listp); | 1796 | list_add(&lseg->pls_lc_list, listp); |
| 1751 | } | 1797 | } |
| 1752 | } | 1798 | } |
| 1753 | 1799 | ||
| 1800 | static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) | ||
| 1801 | { | ||
| 1802 | struct pnfs_layout_segment *lseg, *tmp; | ||
| 1803 | unsigned long *bitlock = &NFS_I(inode)->flags; | ||
| 1804 | |||
| 1805 | /* Matched by references in pnfs_set_layoutcommit */ | ||
| 1806 | list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { | ||
| 1807 | list_del_init(&lseg->pls_lc_list); | ||
| 1808 | pnfs_put_lseg(lseg); | ||
| 1809 | } | ||
| 1810 | |||
| 1811 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | ||
| 1812 | smp_mb__after_clear_bit(); | ||
| 1813 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | ||
| 1814 | } | ||
| 1815 | |||
| 1754 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | 1816 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) |
| 1755 | { | 1817 | { |
| 1756 | pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); | 1818 | pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); |
| @@ -1795,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | |||
| 1795 | 1857 | ||
| 1796 | if (nfss->pnfs_curr_ld->cleanup_layoutcommit) | 1858 | if (nfss->pnfs_curr_ld->cleanup_layoutcommit) |
| 1797 | nfss->pnfs_curr_ld->cleanup_layoutcommit(data); | 1859 | nfss->pnfs_curr_ld->cleanup_layoutcommit(data); |
| 1860 | pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); | ||
| 1798 | } | 1861 | } |
| 1799 | 1862 | ||
| 1800 | /* | 1863 | /* |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 94ba80417748..f5f8a470a647 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | |||
| 219 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 219 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
| 220 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 220 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
| 221 | int _pnfs_return_layout(struct inode *); | 221 | int _pnfs_return_layout(struct inode *); |
| 222 | int pnfs_commit_and_return_layout(struct inode *); | ||
| 222 | void pnfs_ld_write_done(struct nfs_write_data *); | 223 | void pnfs_ld_write_done(struct nfs_write_data *); |
| 223 | void pnfs_ld_read_done(struct nfs_read_data *); | 224 | void pnfs_ld_read_done(struct nfs_read_data *); |
| 224 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | 225 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, |
| @@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino) | |||
| 407 | return 0; | 408 | return 0; |
| 408 | } | 409 | } |
| 409 | 410 | ||
| 411 | static inline int pnfs_commit_and_return_layout(struct inode *inode) | ||
| 412 | { | ||
| 413 | return 0; | ||
| 414 | } | ||
| 415 | |||
| 410 | static inline bool | 416 | static inline bool |
| 411 | pnfs_ld_layoutret_on_setattr(struct inode *inode) | 417 | pnfs_ld_layoutret_on_setattr(struct inode *inode) |
| 412 | { | 418 | { |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 01168865dd37..a2720071f282 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -264,7 +264,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
| 264 | iattr->ia_valid |= ATTR_SIZE; | 264 | iattr->ia_valid |= ATTR_SIZE; |
| 265 | } | 265 | } |
| 266 | if (bmval[0] & FATTR4_WORD0_ACL) { | 266 | if (bmval[0] & FATTR4_WORD0_ACL) { |
| 267 | int nace; | 267 | u32 nace; |
| 268 | struct nfs4_ace *ace; | 268 | struct nfs4_ace *ace; |
| 269 | 269 | ||
| 270 | READ_BUF(4); len += 4; | 270 | READ_BUF(4); len += 4; |
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 62c1ee128aeb..ca05f6dc3544 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
| @@ -102,7 +102,8 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) | |||
| 102 | { | 102 | { |
| 103 | if (rp->c_type == RC_REPLBUFF) | 103 | if (rp->c_type == RC_REPLBUFF) |
| 104 | kfree(rp->c_replvec.iov_base); | 104 | kfree(rp->c_replvec.iov_base); |
| 105 | hlist_del(&rp->c_hash); | 105 | if (!hlist_unhashed(&rp->c_hash)) |
| 106 | hlist_del(&rp->c_hash); | ||
| 106 | list_del(&rp->c_lru); | 107 | list_del(&rp->c_lru); |
| 107 | --num_drc_entries; | 108 | --num_drc_entries; |
| 108 | kmem_cache_free(drc_slab, rp); | 109 | kmem_cache_free(drc_slab, rp); |
| @@ -118,6 +119,10 @@ nfsd_reply_cache_free(struct svc_cacherep *rp) | |||
| 118 | 119 | ||
| 119 | int nfsd_reply_cache_init(void) | 120 | int nfsd_reply_cache_init(void) |
| 120 | { | 121 | { |
| 122 | INIT_LIST_HEAD(&lru_head); | ||
| 123 | max_drc_entries = nfsd_cache_size_limit(); | ||
| 124 | num_drc_entries = 0; | ||
| 125 | |||
| 121 | register_shrinker(&nfsd_reply_cache_shrinker); | 126 | register_shrinker(&nfsd_reply_cache_shrinker); |
| 122 | drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), | 127 | drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), |
| 123 | 0, 0, NULL); | 128 | 0, 0, NULL); |
| @@ -128,10 +133,6 @@ int nfsd_reply_cache_init(void) | |||
| 128 | if (!cache_hash) | 133 | if (!cache_hash) |
| 129 | goto out_nomem; | 134 | goto out_nomem; |
| 130 | 135 | ||
| 131 | INIT_LIST_HEAD(&lru_head); | ||
| 132 | max_drc_entries = nfsd_cache_size_limit(); | ||
| 133 | num_drc_entries = 0; | ||
| 134 | |||
| 135 | return 0; | 136 | return 0; |
| 136 | out_nomem: | 137 | out_nomem: |
| 137 | printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); | 138 | printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2a7eb536de0b..2b2e2396a869 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -1013,6 +1013,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
| 1013 | int host_err; | 1013 | int host_err; |
| 1014 | int stable = *stablep; | 1014 | int stable = *stablep; |
| 1015 | int use_wgather; | 1015 | int use_wgather; |
| 1016 | loff_t pos = offset; | ||
| 1016 | 1017 | ||
| 1017 | dentry = file->f_path.dentry; | 1018 | dentry = file->f_path.dentry; |
| 1018 | inode = dentry->d_inode; | 1019 | inode = dentry->d_inode; |
| @@ -1025,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
| 1025 | 1026 | ||
| 1026 | /* Write the data. */ | 1027 | /* Write the data. */ |
| 1027 | oldfs = get_fs(); set_fs(KERNEL_DS); | 1028 | oldfs = get_fs(); set_fs(KERNEL_DS); |
| 1028 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); | 1029 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); |
| 1029 | set_fs(oldfs); | 1030 | set_fs(oldfs); |
| 1030 | if (host_err < 0) | 1031 | if (host_err < 0) |
| 1031 | goto out_nfserr; | 1032 | goto out_nfserr; |
diff --git a/fs/pnode.c b/fs/pnode.c index 3e000a51ac0d..8b29d2164da6 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <linux/mnt_namespace.h> | 9 | #include <linux/mnt_namespace.h> |
| 10 | #include <linux/mount.h> | 10 | #include <linux/mount.h> |
| 11 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
| 12 | #include <linux/nsproxy.h> | ||
| 12 | #include "internal.h" | 13 | #include "internal.h" |
| 13 | #include "pnode.h" | 14 | #include "pnode.h" |
| 14 | 15 | ||
| @@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest, | |||
| 220 | int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, | 221 | int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, |
| 221 | struct mount *source_mnt, struct list_head *tree_list) | 222 | struct mount *source_mnt, struct list_head *tree_list) |
| 222 | { | 223 | { |
| 224 | struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; | ||
| 223 | struct mount *m, *child; | 225 | struct mount *m, *child; |
| 224 | int ret = 0; | 226 | int ret = 0; |
| 225 | struct mount *prev_dest_mnt = dest_mnt; | 227 | struct mount *prev_dest_mnt = dest_mnt; |
| @@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, | |||
| 237 | 239 | ||
| 238 | source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); | 240 | source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); |
| 239 | 241 | ||
| 242 | /* Notice when we are propagating across user namespaces */ | ||
| 243 | if (m->mnt_ns->user_ns != user_ns) | ||
| 244 | type |= CL_UNPRIVILEGED; | ||
| 245 | |||
| 240 | child = copy_tree(source, source->mnt.mnt_root, type); | 246 | child = copy_tree(source, source->mnt.mnt_root, type); |
| 241 | if (IS_ERR(child)) { | 247 | if (IS_ERR(child)) { |
| 242 | ret = PTR_ERR(child); | 248 | ret = PTR_ERR(child); |
diff --git a/fs/pnode.h b/fs/pnode.h index 19b853a3445c..a0493d5ebfbf 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #define CL_MAKE_SHARED 0x08 | 23 | #define CL_MAKE_SHARED 0x08 |
| 24 | #define CL_PRIVATE 0x10 | 24 | #define CL_PRIVATE 0x10 |
| 25 | #define CL_SHARED_TO_SLAVE 0x20 | 25 | #define CL_SHARED_TO_SLAVE 0x20 |
| 26 | #define CL_UNPRIVILEGED 0x40 | ||
| 26 | 27 | ||
| 27 | static inline void set_mnt_shared(struct mount *mnt) | 28 | static inline void set_mnt_shared(struct mount *mnt) |
| 28 | { | 29 | { |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a86aebc9ba7c..869116c2afbe 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
| @@ -446,9 +446,10 @@ static const struct file_operations proc_reg_file_ops_no_compat = { | |||
| 446 | 446 | ||
| 447 | struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | 447 | struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) |
| 448 | { | 448 | { |
| 449 | struct inode *inode = iget_locked(sb, de->low_ino); | 449 | struct inode *inode = new_inode_pseudo(sb); |
| 450 | 450 | ||
| 451 | if (inode && (inode->i_state & I_NEW)) { | 451 | if (inode) { |
| 452 | inode->i_ino = de->low_ino; | ||
| 452 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 453 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
| 453 | PROC_I(inode)->pde = de; | 454 | PROC_I(inode)->pde = de; |
| 454 | 455 | ||
| @@ -476,7 +477,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
| 476 | inode->i_fop = de->proc_fops; | 477 | inode->i_fop = de->proc_fops; |
| 477 | } | 478 | } |
| 478 | } | 479 | } |
| 479 | unlock_new_inode(inode); | ||
| 480 | } else | 480 | } else |
| 481 | pde_put(de); | 481 | pde_put(de); |
| 482 | return inode; | 482 | return inode; |
diff --git a/fs/proc/root.c b/fs/proc/root.c index c6e9fac26bac..9c7fab1d23f0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
| 17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
| 18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
| 19 | #include <linux/user_namespace.h> | ||
| 19 | #include <linux/mount.h> | 20 | #include <linux/mount.h> |
| 20 | #include <linux/pid_namespace.h> | 21 | #include <linux/pid_namespace.h> |
| 21 | #include <linux/parser.h> | 22 | #include <linux/parser.h> |
| @@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
| 108 | } else { | 109 | } else { |
| 109 | ns = task_active_pid_ns(current); | 110 | ns = task_active_pid_ns(current); |
| 110 | options = data; | 111 | options = data; |
| 112 | |||
| 113 | if (!current_user_ns()->may_mount_proc) | ||
| 114 | return ERR_PTR(-EPERM); | ||
| 111 | } | 115 | } |
| 112 | 116 | ||
| 113 | sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); | 117 | sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); |
diff --git a/fs/read_write.c b/fs/read_write.c index a698eff457fb..e6ddc8dceb96 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/splice.h> | 17 | #include <linux/splice.h> |
| 18 | #include <linux/compat.h> | 18 | #include <linux/compat.h> |
| 19 | #include "read_write.h" | 19 | #include "read_write.h" |
| 20 | #include "internal.h" | ||
| 20 | 21 | ||
| 21 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
| 22 | #include <asm/unistd.h> | 23 | #include <asm/unistd.h> |
| @@ -417,6 +418,33 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof | |||
| 417 | 418 | ||
| 418 | EXPORT_SYMBOL(do_sync_write); | 419 | EXPORT_SYMBOL(do_sync_write); |
| 419 | 420 | ||
| 421 | ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) | ||
| 422 | { | ||
| 423 | mm_segment_t old_fs; | ||
| 424 | const char __user *p; | ||
| 425 | ssize_t ret; | ||
| 426 | |||
| 427 | if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) | ||
| 428 | return -EINVAL; | ||
| 429 | |||
| 430 | old_fs = get_fs(); | ||
| 431 | set_fs(get_ds()); | ||
| 432 | p = (__force const char __user *)buf; | ||
| 433 | if (count > MAX_RW_COUNT) | ||
| 434 | count = MAX_RW_COUNT; | ||
| 435 | if (file->f_op->write) | ||
| 436 | ret = file->f_op->write(file, p, count, pos); | ||
| 437 | else | ||
| 438 | ret = do_sync_write(file, p, count, pos); | ||
| 439 | set_fs(old_fs); | ||
| 440 | if (ret > 0) { | ||
| 441 | fsnotify_modify(file); | ||
| 442 | add_wchar(current, ret); | ||
| 443 | } | ||
| 444 | inc_syscw(current); | ||
| 445 | return ret; | ||
| 446 | } | ||
| 447 | |||
| 420 | ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) | 448 | ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) |
| 421 | { | 449 | { |
| 422 | ssize_t ret; | 450 | ssize_t ret; |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c196369fe408..4cce1d9552fb 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
| @@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, | |||
| 187 | if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) | 187 | if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) |
| 188 | return -ENOSPC; | 188 | return -ENOSPC; |
| 189 | 189 | ||
| 190 | if (name[0] == '.' && (name[1] == '\0' || | 190 | if (name[0] == '.' && (namelen < 2 || |
| 191 | (name[1] == '.' && name[2] == '\0'))) | 191 | (namelen == 2 && name[1] == '.'))) |
| 192 | return 0; | 192 | return 0; |
| 193 | 193 | ||
| 194 | dentry = lookup_one_len(name, dbuf->xadir, namelen); | 194 | dentry = lookup_one_len(name, dbuf->xadir, namelen); |
diff --git a/fs/splice.c b/fs/splice.c index 718bd0056384..29e394e49ddd 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
| 32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
| 33 | #include <linux/socket.h> | 33 | #include <linux/socket.h> |
| 34 | #include "internal.h" | ||
| 34 | 35 | ||
| 35 | /* | 36 | /* |
| 36 | * Attempt to steal a page from a pipe buffer. This should perhaps go into | 37 | * Attempt to steal a page from a pipe buffer. This should perhaps go into |
| @@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | |||
| 1048 | { | 1049 | { |
| 1049 | int ret; | 1050 | int ret; |
| 1050 | void *data; | 1051 | void *data; |
| 1052 | loff_t tmp = sd->pos; | ||
| 1051 | 1053 | ||
| 1052 | data = buf->ops->map(pipe, buf, 0); | 1054 | data = buf->ops->map(pipe, buf, 0); |
| 1053 | ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); | 1055 | ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); |
| 1054 | buf->ops->unmap(pipe, buf, data); | 1056 | buf->ops->unmap(pipe, buf, data); |
| 1055 | 1057 | ||
| 1056 | return ret; | 1058 | return ret; |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 2fbdff6be25c..e14512678c9b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
| @@ -1020,6 +1020,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 1020 | ino = parent_sd->s_ino; | 1020 | ino = parent_sd->s_ino; |
| 1021 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) | 1021 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) |
| 1022 | filp->f_pos++; | 1022 | filp->f_pos++; |
| 1023 | else | ||
| 1024 | return 0; | ||
| 1023 | } | 1025 | } |
| 1024 | if (filp->f_pos == 1) { | 1026 | if (filp->f_pos == 1) { |
| 1025 | if (parent_sd->s_parent) | 1027 | if (parent_sd->s_parent) |
| @@ -1028,6 +1030,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 1028 | ino = parent_sd->s_ino; | 1030 | ino = parent_sd->s_ino; |
| 1029 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) | 1031 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) |
| 1030 | filp->f_pos++; | 1032 | filp->f_pos++; |
| 1033 | else | ||
| 1034 | return 0; | ||
| 1031 | } | 1035 | } |
| 1032 | mutex_lock(&sysfs_mutex); | 1036 | mutex_lock(&sysfs_mutex); |
| 1033 | for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); | 1037 | for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); |
| @@ -1058,10 +1062,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 1058 | return 0; | 1062 | return 0; |
| 1059 | } | 1063 | } |
| 1060 | 1064 | ||
| 1065 | static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) | ||
| 1066 | { | ||
| 1067 | struct inode *inode = file_inode(file); | ||
| 1068 | loff_t ret; | ||
| 1069 | |||
| 1070 | mutex_lock(&inode->i_mutex); | ||
| 1071 | ret = generic_file_llseek(file, offset, whence); | ||
| 1072 | mutex_unlock(&inode->i_mutex); | ||
| 1073 | |||
| 1074 | return ret; | ||
| 1075 | } | ||
| 1061 | 1076 | ||
| 1062 | const struct file_operations sysfs_dir_operations = { | 1077 | const struct file_operations sysfs_dir_operations = { |
| 1063 | .read = generic_read_dir, | 1078 | .read = generic_read_dir, |
| 1064 | .readdir = sysfs_readdir, | 1079 | .readdir = sysfs_readdir, |
| 1065 | .release = sysfs_dir_release, | 1080 | .release = sysfs_dir_release, |
| 1066 | .llseek = generic_file_llseek, | 1081 | .llseek = sysfs_dir_llseek, |
| 1067 | }; | 1082 | }; |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8d924b5ec733..afd83273e6ce 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
| 20 | #include <linux/magic.h> | 20 | #include <linux/magic.h> |
| 21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
| 22 | #include <linux/user_namespace.h> | ||
| 22 | 23 | ||
| 23 | #include "sysfs.h" | 24 | #include "sysfs.h" |
| 24 | 25 | ||
| @@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, | |||
| 111 | struct super_block *sb; | 112 | struct super_block *sb; |
| 112 | int error; | 113 | int error; |
| 113 | 114 | ||
| 115 | if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) | ||
| 116 | return ERR_PTR(-EPERM); | ||
| 117 | |||
| 114 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 118 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
| 115 | if (!info) | 119 | if (!info) |
| 116 | return ERR_PTR(-ENOMEM); | 120 | return ERR_PTR(-ENOMEM); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ac838b844936..f21acf0ef01f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -1568,6 +1568,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1568 | c->remounting_rw = 1; | 1568 | c->remounting_rw = 1; |
| 1569 | c->ro_mount = 0; | 1569 | c->ro_mount = 0; |
| 1570 | 1570 | ||
| 1571 | if (c->space_fixup) { | ||
| 1572 | err = ubifs_fixup_free_space(c); | ||
| 1573 | if (err) | ||
| 1574 | return err; | ||
| 1575 | } | ||
| 1576 | |||
| 1571 | err = check_free_space(c); | 1577 | err = check_free_space(c); |
| 1572 | if (err) | 1578 | if (err) |
| 1573 | goto out; | 1579 | goto out; |
| @@ -1684,12 +1690,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1684 | err = dbg_check_space_info(c); | 1690 | err = dbg_check_space_info(c); |
| 1685 | } | 1691 | } |
| 1686 | 1692 | ||
| 1687 | if (c->space_fixup) { | ||
| 1688 | err = ubifs_fixup_free_space(c); | ||
| 1689 | if (err) | ||
| 1690 | goto out; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | mutex_unlock(&c->umount_mutex); | 1693 | mutex_unlock(&c->umount_mutex); |
| 1694 | return err; | 1694 | return err; |
| 1695 | 1695 | ||
