diff options
Diffstat (limited to 'fs')
62 files changed, 3745 insertions, 1522 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 89b6ce3634fd..c0ddfd29c5e5 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -7,7 +7,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o delayed-inode.o backref.o \ | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
11 | scrub.o | 11 | reada.o backref.o |
12 | 12 | ||
13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index eb159aaa5a11..89b156d85d63 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -59,22 +59,19 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
59 | if (!value) | 59 | if (!value) |
60 | return ERR_PTR(-ENOMEM); | 60 | return ERR_PTR(-ENOMEM); |
61 | size = __btrfs_getxattr(inode, name, value, size); | 61 | size = __btrfs_getxattr(inode, name, value, size); |
62 | if (size > 0) { | 62 | } |
63 | acl = posix_acl_from_xattr(value, size); | 63 | if (size > 0) { |
64 | if (IS_ERR(acl)) { | 64 | acl = posix_acl_from_xattr(value, size); |
65 | kfree(value); | ||
66 | return acl; | ||
67 | } | ||
68 | set_cached_acl(inode, type, acl); | ||
69 | } | ||
70 | kfree(value); | ||
71 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { | 65 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { |
72 | /* FIXME, who returns -ENOENT? I think nobody */ | 66 | /* FIXME, who returns -ENOENT? I think nobody */ |
73 | acl = NULL; | 67 | acl = NULL; |
74 | set_cached_acl(inode, type, acl); | ||
75 | } else { | 68 | } else { |
76 | acl = ERR_PTR(-EIO); | 69 | acl = ERR_PTR(-EIO); |
77 | } | 70 | } |
71 | kfree(value); | ||
72 | |||
73 | if (!IS_ERR(acl)) | ||
74 | set_cached_acl(inode, type, acl); | ||
78 | 75 | ||
79 | return acl; | 76 | return acl; |
80 | } | 77 | } |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d9f99a16edd6..5a5d325a3935 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -103,11 +103,6 @@ struct btrfs_inode { | |||
103 | */ | 103 | */ |
104 | u64 delalloc_bytes; | 104 | u64 delalloc_bytes; |
105 | 105 | ||
106 | /* total number of bytes that may be used for this inode for | ||
107 | * delalloc | ||
108 | */ | ||
109 | u64 reserved_bytes; | ||
110 | |||
111 | /* | 106 | /* |
112 | * the size of the file stored in the metadata on disk. data=ordered | 107 | * the size of the file stored in the metadata on disk. data=ordered |
113 | * means the in-memory i_size might be larger than the size on disk | 108 | * means the in-memory i_size might be larger than the size on disk |
@@ -115,9 +110,6 @@ struct btrfs_inode { | |||
115 | */ | 110 | */ |
116 | u64 disk_i_size; | 111 | u64 disk_i_size; |
117 | 112 | ||
118 | /* flags field from the on disk inode */ | ||
119 | u32 flags; | ||
120 | |||
121 | /* | 113 | /* |
122 | * if this is a directory then index_cnt is the counter for the index | 114 | * if this is a directory then index_cnt is the counter for the index |
123 | * number for new files that are created | 115 | * number for new files that are created |
@@ -132,6 +124,15 @@ struct btrfs_inode { | |||
132 | u64 last_unlink_trans; | 124 | u64 last_unlink_trans; |
133 | 125 | ||
134 | /* | 126 | /* |
127 | * Number of bytes outstanding that are going to need csums. This is | ||
128 | * used in ENOSPC accounting. | ||
129 | */ | ||
130 | u64 csum_bytes; | ||
131 | |||
132 | /* flags field from the on disk inode */ | ||
133 | u32 flags; | ||
134 | |||
135 | /* | ||
135 | * Counters to keep track of the number of extent item's we may use due | 136 | * Counters to keep track of the number of extent item's we may use due |
136 | * to delalloc and such. outstanding_extents is the number of extent | 137 | * to delalloc and such. outstanding_extents is the number of extent |
137 | * items we think we'll end up using, and reserved_extents is the number | 138 | * items we think we'll end up using, and reserved_extents is the number |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8ec5d86f1734..14f1c5a0b2d2 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -85,7 +85,8 @@ struct compressed_bio { | |||
85 | static inline int compressed_bio_size(struct btrfs_root *root, | 85 | static inline int compressed_bio_size(struct btrfs_root *root, |
86 | unsigned long disk_size) | 86 | unsigned long disk_size) |
87 | { | 87 | { |
88 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | 88 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
89 | |||
89 | return sizeof(struct compressed_bio) + | 90 | return sizeof(struct compressed_bio) + |
90 | ((disk_size + root->sectorsize - 1) / root->sectorsize) * | 91 | ((disk_size + root->sectorsize - 1) / root->sectorsize) * |
91 | csum_size; | 92 | csum_size; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 011cab3aca8d..0fe615e4ea38 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -902,9 +902,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
902 | 902 | ||
903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | 903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); |
904 | 904 | ||
905 | if (level < BTRFS_MAX_LEVEL - 1) | 905 | if (level < BTRFS_MAX_LEVEL - 1) { |
906 | parent = path->nodes[level + 1]; | 906 | parent = path->nodes[level + 1]; |
907 | pslot = path->slots[level + 1]; | 907 | pslot = path->slots[level + 1]; |
908 | } | ||
908 | 909 | ||
909 | /* | 910 | /* |
910 | * deal with the case where there is only one pointer in the root | 911 | * deal with the case where there is only one pointer in the root |
@@ -1107,9 +1108,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1107 | mid = path->nodes[level]; | 1108 | mid = path->nodes[level]; |
1108 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 1109 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
1109 | 1110 | ||
1110 | if (level < BTRFS_MAX_LEVEL - 1) | 1111 | if (level < BTRFS_MAX_LEVEL - 1) { |
1111 | parent = path->nodes[level + 1]; | 1112 | parent = path->nodes[level + 1]; |
1112 | pslot = path->slots[level + 1]; | 1113 | pslot = path->slots[level + 1]; |
1114 | } | ||
1113 | 1115 | ||
1114 | if (!parent) | 1116 | if (!parent) |
1115 | return 1; | 1117 | return 1; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03912c5c6f49..b9ba59ff9292 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/kobject.h> | 30 | #include <linux/kobject.h> |
31 | #include <trace/events/btrfs.h> | 31 | #include <trace/events/btrfs.h> |
32 | #include <asm/kmap_types.h> | 32 | #include <asm/kmap_types.h> |
33 | #include <linux/pagemap.h> | ||
33 | #include "extent_io.h" | 34 | #include "extent_io.h" |
34 | #include "extent_map.h" | 35 | #include "extent_map.h" |
35 | #include "async-thread.h" | 36 | #include "async-thread.h" |
@@ -360,6 +361,47 @@ struct btrfs_header { | |||
360 | #define BTRFS_LABEL_SIZE 256 | 361 | #define BTRFS_LABEL_SIZE 256 |
361 | 362 | ||
362 | /* | 363 | /* |
364 | * just in case we somehow lose the roots and are not able to mount, | ||
365 | * we store an array of the roots from previous transactions | ||
366 | * in the super. | ||
367 | */ | ||
368 | #define BTRFS_NUM_BACKUP_ROOTS 4 | ||
369 | struct btrfs_root_backup { | ||
370 | __le64 tree_root; | ||
371 | __le64 tree_root_gen; | ||
372 | |||
373 | __le64 chunk_root; | ||
374 | __le64 chunk_root_gen; | ||
375 | |||
376 | __le64 extent_root; | ||
377 | __le64 extent_root_gen; | ||
378 | |||
379 | __le64 fs_root; | ||
380 | __le64 fs_root_gen; | ||
381 | |||
382 | __le64 dev_root; | ||
383 | __le64 dev_root_gen; | ||
384 | |||
385 | __le64 csum_root; | ||
386 | __le64 csum_root_gen; | ||
387 | |||
388 | __le64 total_bytes; | ||
389 | __le64 bytes_used; | ||
390 | __le64 num_devices; | ||
391 | /* future */ | ||
392 | __le64 unsed_64[4]; | ||
393 | |||
394 | u8 tree_root_level; | ||
395 | u8 chunk_root_level; | ||
396 | u8 extent_root_level; | ||
397 | u8 fs_root_level; | ||
398 | u8 dev_root_level; | ||
399 | u8 csum_root_level; | ||
400 | /* future and to align */ | ||
401 | u8 unused_8[10]; | ||
402 | } __attribute__ ((__packed__)); | ||
403 | |||
404 | /* | ||
363 | * the super block basically lists the main trees of the FS | 405 | * the super block basically lists the main trees of the FS |
364 | * it currently lacks any block count etc etc | 406 | * it currently lacks any block count etc etc |
365 | */ | 407 | */ |
@@ -405,6 +447,7 @@ struct btrfs_super_block { | |||
405 | /* future expansion */ | 447 | /* future expansion */ |
406 | __le64 reserved[31]; | 448 | __le64 reserved[31]; |
407 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; | 449 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; |
450 | struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; | ||
408 | } __attribute__ ((__packed__)); | 451 | } __attribute__ ((__packed__)); |
409 | 452 | ||
410 | /* | 453 | /* |
@@ -772,14 +815,8 @@ struct btrfs_space_info { | |||
772 | struct btrfs_block_rsv { | 815 | struct btrfs_block_rsv { |
773 | u64 size; | 816 | u64 size; |
774 | u64 reserved; | 817 | u64 reserved; |
775 | u64 freed[2]; | ||
776 | struct btrfs_space_info *space_info; | 818 | struct btrfs_space_info *space_info; |
777 | struct list_head list; | ||
778 | spinlock_t lock; | 819 | spinlock_t lock; |
779 | atomic_t usage; | ||
780 | unsigned int priority:8; | ||
781 | unsigned int durable:1; | ||
782 | unsigned int refill_used:1; | ||
783 | unsigned int full:1; | 820 | unsigned int full:1; |
784 | }; | 821 | }; |
785 | 822 | ||
@@ -840,10 +877,10 @@ struct btrfs_block_group_cache { | |||
840 | spinlock_t lock; | 877 | spinlock_t lock; |
841 | u64 pinned; | 878 | u64 pinned; |
842 | u64 reserved; | 879 | u64 reserved; |
843 | u64 reserved_pinned; | ||
844 | u64 bytes_super; | 880 | u64 bytes_super; |
845 | u64 flags; | 881 | u64 flags; |
846 | u64 sectorsize; | 882 | u64 sectorsize; |
883 | u64 cache_generation; | ||
847 | unsigned int ro:1; | 884 | unsigned int ro:1; |
848 | unsigned int dirty:1; | 885 | unsigned int dirty:1; |
849 | unsigned int iref:1; | 886 | unsigned int iref:1; |
@@ -899,6 +936,10 @@ struct btrfs_fs_info { | |||
899 | spinlock_t block_group_cache_lock; | 936 | spinlock_t block_group_cache_lock; |
900 | struct rb_root block_group_cache_tree; | 937 | struct rb_root block_group_cache_tree; |
901 | 938 | ||
939 | /* keep track of unallocated space */ | ||
940 | spinlock_t free_chunk_lock; | ||
941 | u64 free_chunk_space; | ||
942 | |||
902 | struct extent_io_tree freed_extents[2]; | 943 | struct extent_io_tree freed_extents[2]; |
903 | struct extent_io_tree *pinned_extents; | 944 | struct extent_io_tree *pinned_extents; |
904 | 945 | ||
@@ -916,14 +957,11 @@ struct btrfs_fs_info { | |||
916 | struct btrfs_block_rsv trans_block_rsv; | 957 | struct btrfs_block_rsv trans_block_rsv; |
917 | /* block reservation for chunk tree */ | 958 | /* block reservation for chunk tree */ |
918 | struct btrfs_block_rsv chunk_block_rsv; | 959 | struct btrfs_block_rsv chunk_block_rsv; |
960 | /* block reservation for delayed operations */ | ||
961 | struct btrfs_block_rsv delayed_block_rsv; | ||
919 | 962 | ||
920 | struct btrfs_block_rsv empty_block_rsv; | 963 | struct btrfs_block_rsv empty_block_rsv; |
921 | 964 | ||
922 | /* list of block reservations that cross multiple transactions */ | ||
923 | struct list_head durable_block_rsv_list; | ||
924 | |||
925 | struct mutex durable_block_rsv_mutex; | ||
926 | |||
927 | u64 generation; | 965 | u64 generation; |
928 | u64 last_trans_committed; | 966 | u64 last_trans_committed; |
929 | 967 | ||
@@ -942,8 +980,8 @@ struct btrfs_fs_info { | |||
942 | wait_queue_head_t transaction_blocked_wait; | 980 | wait_queue_head_t transaction_blocked_wait; |
943 | wait_queue_head_t async_submit_wait; | 981 | wait_queue_head_t async_submit_wait; |
944 | 982 | ||
945 | struct btrfs_super_block super_copy; | 983 | struct btrfs_super_block *super_copy; |
946 | struct btrfs_super_block super_for_commit; | 984 | struct btrfs_super_block *super_for_commit; |
947 | struct block_device *__bdev; | 985 | struct block_device *__bdev; |
948 | struct super_block *sb; | 986 | struct super_block *sb; |
949 | struct inode *btree_inode; | 987 | struct inode *btree_inode; |
@@ -1036,6 +1074,7 @@ struct btrfs_fs_info { | |||
1036 | struct btrfs_workers endio_freespace_worker; | 1074 | struct btrfs_workers endio_freespace_worker; |
1037 | struct btrfs_workers submit_workers; | 1075 | struct btrfs_workers submit_workers; |
1038 | struct btrfs_workers caching_workers; | 1076 | struct btrfs_workers caching_workers; |
1077 | struct btrfs_workers readahead_workers; | ||
1039 | 1078 | ||
1040 | /* | 1079 | /* |
1041 | * fixup workers take dirty pages that didn't properly go through | 1080 | * fixup workers take dirty pages that didn't properly go through |
@@ -1119,6 +1158,13 @@ struct btrfs_fs_info { | |||
1119 | u64 fs_state; | 1158 | u64 fs_state; |
1120 | 1159 | ||
1121 | struct btrfs_delayed_root *delayed_root; | 1160 | struct btrfs_delayed_root *delayed_root; |
1161 | |||
1162 | /* readahead tree */ | ||
1163 | spinlock_t reada_lock; | ||
1164 | struct radix_tree_root reada_tree; | ||
1165 | |||
1166 | /* next backup root to be overwritten */ | ||
1167 | int backup_root_index; | ||
1122 | }; | 1168 | }; |
1123 | 1169 | ||
1124 | /* | 1170 | /* |
@@ -1363,6 +1409,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
1363 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | 1409 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) |
1364 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | 1410 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) |
1365 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) | 1411 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) |
1412 | #define BTRFS_MOUNT_RECOVERY (1 << 18) | ||
1366 | 1413 | ||
1367 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1414 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1368 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1415 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1978,6 +2025,55 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root) | |||
1978 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | 2025 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; |
1979 | } | 2026 | } |
1980 | 2027 | ||
2028 | /* struct btrfs_root_backup */ | ||
2029 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, | ||
2030 | tree_root, 64); | ||
2031 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, | ||
2032 | tree_root_gen, 64); | ||
2033 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, | ||
2034 | tree_root_level, 8); | ||
2035 | |||
2036 | BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, | ||
2037 | chunk_root, 64); | ||
2038 | BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, | ||
2039 | chunk_root_gen, 64); | ||
2040 | BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, | ||
2041 | chunk_root_level, 8); | ||
2042 | |||
2043 | BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, | ||
2044 | extent_root, 64); | ||
2045 | BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, | ||
2046 | extent_root_gen, 64); | ||
2047 | BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, | ||
2048 | extent_root_level, 8); | ||
2049 | |||
2050 | BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, | ||
2051 | fs_root, 64); | ||
2052 | BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, | ||
2053 | fs_root_gen, 64); | ||
2054 | BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, | ||
2055 | fs_root_level, 8); | ||
2056 | |||
2057 | BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, | ||
2058 | dev_root, 64); | ||
2059 | BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, | ||
2060 | dev_root_gen, 64); | ||
2061 | BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, | ||
2062 | dev_root_level, 8); | ||
2063 | |||
2064 | BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, | ||
2065 | csum_root, 64); | ||
2066 | BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, | ||
2067 | csum_root_gen, 64); | ||
2068 | BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, | ||
2069 | csum_root_level, 8); | ||
2070 | BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, | ||
2071 | total_bytes, 64); | ||
2072 | BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, | ||
2073 | bytes_used, 64); | ||
2074 | BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, | ||
2075 | num_devices, 64); | ||
2076 | |||
1981 | /* struct btrfs_super_block */ | 2077 | /* struct btrfs_super_block */ |
1982 | 2078 | ||
1983 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 2079 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
@@ -2129,6 +2225,11 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | |||
2129 | (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); | 2225 | (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); |
2130 | } | 2226 | } |
2131 | 2227 | ||
2228 | static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) | ||
2229 | { | ||
2230 | return mapping_gfp_mask(mapping) & ~__GFP_FS; | ||
2231 | } | ||
2232 | |||
2132 | /* extent-tree.c */ | 2233 | /* extent-tree.c */ |
2133 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 2234 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
2134 | unsigned num_items) | 2235 | unsigned num_items) |
@@ -2137,6 +2238,17 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | |||
2137 | 3 * num_items; | 2238 | 3 * num_items; |
2138 | } | 2239 | } |
2139 | 2240 | ||
2241 | /* | ||
2242 | * Doing a truncate won't result in new nodes or leaves, just what we need for | ||
2243 | * COW. | ||
2244 | */ | ||
2245 | static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, | ||
2246 | unsigned num_items) | ||
2247 | { | ||
2248 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
2249 | num_items; | ||
2250 | } | ||
2251 | |||
2140 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 2252 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
2141 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 2253 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
2142 | struct btrfs_root *root, unsigned long count); | 2254 | struct btrfs_root *root, unsigned long count); |
@@ -2146,6 +2258,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | |||
2146 | u64 num_bytes, u64 *refs, u64 *flags); | 2258 | u64 num_bytes, u64 *refs, u64 *flags); |
2147 | int btrfs_pin_extent(struct btrfs_root *root, | 2259 | int btrfs_pin_extent(struct btrfs_root *root, |
2148 | u64 bytenr, u64 num, int reserved); | 2260 | u64 bytenr, u64 num, int reserved); |
2261 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | ||
2262 | struct btrfs_root *root, | ||
2263 | u64 bytenr, u64 num_bytes); | ||
2149 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 2264 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
2150 | struct btrfs_root *root, | 2265 | struct btrfs_root *root, |
2151 | u64 objectid, u64 offset, u64 bytenr); | 2266 | u64 objectid, u64 offset, u64 bytenr); |
@@ -2196,8 +2311,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2196 | u64 root_objectid, u64 owner, u64 offset); | 2311 | u64 root_objectid, u64 owner, u64 offset); |
2197 | 2312 | ||
2198 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2313 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
2199 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 2314 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, |
2200 | u64 num_bytes, int reserve, int sinfo); | 2315 | u64 start, u64 len); |
2201 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 2316 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
2202 | struct btrfs_root *root); | 2317 | struct btrfs_root *root); |
2203 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2318 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
@@ -2240,25 +2355,23 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | |||
2240 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | 2355 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); |
2241 | void btrfs_free_block_rsv(struct btrfs_root *root, | 2356 | void btrfs_free_block_rsv(struct btrfs_root *root, |
2242 | struct btrfs_block_rsv *rsv); | 2357 | struct btrfs_block_rsv *rsv); |
2243 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | 2358 | int btrfs_block_rsv_add(struct btrfs_root *root, |
2244 | struct btrfs_block_rsv *rsv); | ||
2245 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
2246 | struct btrfs_root *root, | ||
2247 | struct btrfs_block_rsv *block_rsv, | 2359 | struct btrfs_block_rsv *block_rsv, |
2248 | u64 num_bytes); | 2360 | u64 num_bytes); |
2249 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | 2361 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, |
2250 | struct btrfs_root *root, | 2362 | struct btrfs_block_rsv *block_rsv, |
2363 | u64 num_bytes); | ||
2364 | int btrfs_block_rsv_check(struct btrfs_root *root, | ||
2365 | struct btrfs_block_rsv *block_rsv, int min_factor); | ||
2366 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
2251 | struct btrfs_block_rsv *block_rsv, | 2367 | struct btrfs_block_rsv *block_rsv, |
2252 | u64 min_reserved, int min_factor); | 2368 | u64 min_reserved); |
2253 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 2369 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
2254 | struct btrfs_block_rsv *dst_rsv, | 2370 | struct btrfs_block_rsv *dst_rsv, |
2255 | u64 num_bytes); | 2371 | u64 num_bytes); |
2256 | void btrfs_block_rsv_release(struct btrfs_root *root, | 2372 | void btrfs_block_rsv_release(struct btrfs_root *root, |
2257 | struct btrfs_block_rsv *block_rsv, | 2373 | struct btrfs_block_rsv *block_rsv, |
2258 | u64 num_bytes); | 2374 | u64 num_bytes); |
2259 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2260 | struct btrfs_root *root, | ||
2261 | struct btrfs_block_rsv *rsv); | ||
2262 | int btrfs_set_block_group_ro(struct btrfs_root *root, | 2375 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
2263 | struct btrfs_block_group_cache *cache); | 2376 | struct btrfs_block_group_cache *cache); |
2264 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2377 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
@@ -2379,6 +2492,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | |||
2379 | smp_mb(); | 2492 | smp_mb(); |
2380 | return fs_info->closing; | 2493 | return fs_info->closing; |
2381 | } | 2494 | } |
2495 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) | ||
2496 | { | ||
2497 | kfree(fs_info->delayed_root); | ||
2498 | kfree(fs_info->extent_root); | ||
2499 | kfree(fs_info->tree_root); | ||
2500 | kfree(fs_info->chunk_root); | ||
2501 | kfree(fs_info->dev_root); | ||
2502 | kfree(fs_info->csum_root); | ||
2503 | kfree(fs_info->super_copy); | ||
2504 | kfree(fs_info->super_for_commit); | ||
2505 | kfree(fs_info); | ||
2506 | } | ||
2382 | 2507 | ||
2383 | /* root-item.c */ | 2508 | /* root-item.c */ |
2384 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2509 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
@@ -2579,11 +2704,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2579 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2704 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2580 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2705 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2581 | int btrfs_orphan_cleanup(struct btrfs_root *root); | 2706 | int btrfs_orphan_cleanup(struct btrfs_root *root); |
2582 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2583 | struct btrfs_pending_snapshot *pending, | ||
2584 | u64 *bytes_to_reserve); | ||
2585 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2586 | struct btrfs_pending_snapshot *pending); | ||
2587 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | 2707 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, |
2588 | struct btrfs_root *root); | 2708 | struct btrfs_root *root); |
2589 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); | 2709 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); |
@@ -2697,4 +2817,20 @@ int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); | |||
2697 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 2817 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
2698 | struct btrfs_scrub_progress *progress); | 2818 | struct btrfs_scrub_progress *progress); |
2699 | 2819 | ||
2820 | /* reada.c */ | ||
2821 | struct reada_control { | ||
2822 | struct btrfs_root *root; /* tree to prefetch */ | ||
2823 | struct btrfs_key key_start; | ||
2824 | struct btrfs_key key_end; /* exclusive */ | ||
2825 | atomic_t elems; | ||
2826 | struct kref refcnt; | ||
2827 | wait_queue_head_t wait; | ||
2828 | }; | ||
2829 | struct reada_control *btrfs_reada_add(struct btrfs_root *root, | ||
2830 | struct btrfs_key *start, struct btrfs_key *end); | ||
2831 | int btrfs_reada_wait(void *handle); | ||
2832 | void btrfs_reada_detach(void *handle); | ||
2833 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | ||
2834 | u64 start, int err); | ||
2835 | |||
2700 | #endif | 2836 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b52c672f4c18..bbe8496d5339 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -591,7 +591,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | |||
591 | return 0; | 591 | return 0; |
592 | 592 | ||
593 | src_rsv = trans->block_rsv; | 593 | src_rsv = trans->block_rsv; |
594 | dst_rsv = &root->fs_info->global_block_rsv; | 594 | dst_rsv = &root->fs_info->delayed_block_rsv; |
595 | 595 | ||
596 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 596 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
597 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 597 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
@@ -609,7 +609,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, | |||
609 | if (!item->bytes_reserved) | 609 | if (!item->bytes_reserved) |
610 | return; | 610 | return; |
611 | 611 | ||
612 | rsv = &root->fs_info->global_block_rsv; | 612 | rsv = &root->fs_info->delayed_block_rsv; |
613 | btrfs_block_rsv_release(root, rsv, | 613 | btrfs_block_rsv_release(root, rsv, |
614 | item->bytes_reserved); | 614 | item->bytes_reserved); |
615 | } | 615 | } |
@@ -624,13 +624,36 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
624 | u64 num_bytes; | 624 | u64 num_bytes; |
625 | int ret; | 625 | int ret; |
626 | 626 | ||
627 | if (!trans->bytes_reserved) | ||
628 | return 0; | ||
629 | |||
630 | src_rsv = trans->block_rsv; | 627 | src_rsv = trans->block_rsv; |
631 | dst_rsv = &root->fs_info->global_block_rsv; | 628 | dst_rsv = &root->fs_info->delayed_block_rsv; |
632 | 629 | ||
633 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 630 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
631 | |||
632 | /* | ||
633 | * btrfs_dirty_inode will update the inode under btrfs_join_transaction | ||
634 | * which doesn't reserve space for speed. This is a problem since we | ||
635 | * still need to reserve space for this update, so try to reserve the | ||
636 | * space. | ||
637 | * | ||
638 | * Now if src_rsv == delalloc_block_rsv we'll let it just steal since | ||
639 | * we're accounted for. | ||
640 | */ | ||
641 | if (!trans->bytes_reserved && | ||
642 | src_rsv != &root->fs_info->delalloc_block_rsv) { | ||
643 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | ||
644 | /* | ||
645 | * Since we're under a transaction reserve_metadata_bytes could | ||
646 | * try to commit the transaction which will make it return | ||
647 | * EAGAIN to make us stop the transaction we have, so return | ||
648 | * ENOSPC instead so that btrfs_dirty_inode knows what to do. | ||
649 | */ | ||
650 | if (ret == -EAGAIN) | ||
651 | ret = -ENOSPC; | ||
652 | if (!ret) | ||
653 | node->bytes_reserved = num_bytes; | ||
654 | return ret; | ||
655 | } | ||
656 | |||
634 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 657 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
635 | if (!ret) | 658 | if (!ret) |
636 | node->bytes_reserved = num_bytes; | 659 | node->bytes_reserved = num_bytes; |
@@ -646,7 +669,7 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root, | |||
646 | if (!node->bytes_reserved) | 669 | if (!node->bytes_reserved) |
647 | return; | 670 | return; |
648 | 671 | ||
649 | rsv = &root->fs_info->global_block_rsv; | 672 | rsv = &root->fs_info->delayed_block_rsv; |
650 | btrfs_block_rsv_release(root, rsv, | 673 | btrfs_block_rsv_release(root, rsv, |
651 | node->bytes_reserved); | 674 | node->bytes_reserved); |
652 | node->bytes_reserved = 0; | 675 | node->bytes_reserved = 0; |
@@ -1026,7 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1026 | path->leave_spinning = 1; | 1049 | path->leave_spinning = 1; |
1027 | 1050 | ||
1028 | block_rsv = trans->block_rsv; | 1051 | block_rsv = trans->block_rsv; |
1029 | trans->block_rsv = &root->fs_info->global_block_rsv; | 1052 | trans->block_rsv = &root->fs_info->delayed_block_rsv; |
1030 | 1053 | ||
1031 | delayed_root = btrfs_get_delayed_root(root); | 1054 | delayed_root = btrfs_get_delayed_root(root); |
1032 | 1055 | ||
@@ -1069,7 +1092,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | |||
1069 | path->leave_spinning = 1; | 1092 | path->leave_spinning = 1; |
1070 | 1093 | ||
1071 | block_rsv = trans->block_rsv; | 1094 | block_rsv = trans->block_rsv; |
1072 | trans->block_rsv = &node->root->fs_info->global_block_rsv; | 1095 | trans->block_rsv = &node->root->fs_info->delayed_block_rsv; |
1073 | 1096 | ||
1074 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); | 1097 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); |
1075 | if (!ret) | 1098 | if (!ret) |
@@ -1149,7 +1172,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
1149 | goto free_path; | 1172 | goto free_path; |
1150 | 1173 | ||
1151 | block_rsv = trans->block_rsv; | 1174 | block_rsv = trans->block_rsv; |
1152 | trans->block_rsv = &root->fs_info->global_block_rsv; | 1175 | trans->block_rsv = &root->fs_info->delayed_block_rsv; |
1153 | 1176 | ||
1154 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); | 1177 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); |
1155 | if (!ret) | 1178 | if (!ret) |
@@ -1686,11 +1709,8 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | |||
1686 | } | 1709 | } |
1687 | 1710 | ||
1688 | ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); | 1711 | ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); |
1689 | /* | 1712 | if (ret) |
1690 | * we must reserve enough space when we start a new transaction, | 1713 | goto release_node; |
1691 | * so reserving metadata failure is impossible | ||
1692 | */ | ||
1693 | BUG_ON(ret); | ||
1694 | 1714 | ||
1695 | fill_stack_inode_item(trans, &delayed_node->inode_item, inode); | 1715 | fill_stack_inode_item(trans, &delayed_node->inode_item, inode); |
1696 | delayed_node->inode_dirty = 1; | 1716 | delayed_node->inode_dirty = 1; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dc0343802535..0eb1f0951251 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -256,8 +256,7 @@ void btrfs_csum_final(u32 crc, char *result) | |||
256 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | 256 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, |
257 | int verify) | 257 | int verify) |
258 | { | 258 | { |
259 | u16 csum_size = | 259 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
260 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
261 | char *result = NULL; | 260 | char *result = NULL; |
262 | unsigned long len; | 261 | unsigned long len; |
263 | unsigned long cur_len; | 262 | unsigned long cur_len; |
@@ -367,7 +366,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
367 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | 366 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); |
368 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 367 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
369 | while (1) { | 368 | while (1) { |
370 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 369 | ret = read_extent_buffer_pages(io_tree, eb, start, |
370 | WAIT_COMPLETE, | ||
371 | btree_get_extent, mirror_num); | 371 | btree_get_extent, mirror_num); |
372 | if (!ret && | 372 | if (!ret && |
373 | !verify_parent_transid(io_tree, eb, parent_transid)) | 373 | !verify_parent_transid(io_tree, eb, parent_transid)) |
@@ -608,11 +608,47 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
608 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 608 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
609 | end = eb->start + end - 1; | 609 | end = eb->start + end - 1; |
610 | err: | 610 | err: |
611 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
612 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
613 | btree_readahead_hook(root, eb, eb->start, ret); | ||
614 | } | ||
615 | |||
611 | free_extent_buffer(eb); | 616 | free_extent_buffer(eb); |
612 | out: | 617 | out: |
613 | return ret; | 618 | return ret; |
614 | } | 619 | } |
615 | 620 | ||
621 | static int btree_io_failed_hook(struct bio *failed_bio, | ||
622 | struct page *page, u64 start, u64 end, | ||
623 | u64 mirror_num, struct extent_state *state) | ||
624 | { | ||
625 | struct extent_io_tree *tree; | ||
626 | unsigned long len; | ||
627 | struct extent_buffer *eb; | ||
628 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
629 | |||
630 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
631 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
632 | goto out; | ||
633 | if (!page->private) | ||
634 | goto out; | ||
635 | |||
636 | len = page->private >> 2; | ||
637 | WARN_ON(len == 0); | ||
638 | |||
639 | eb = alloc_extent_buffer(tree, start, len, page); | ||
640 | if (eb == NULL) | ||
641 | goto out; | ||
642 | |||
643 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
644 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
645 | btree_readahead_hook(root, eb, eb->start, -EIO); | ||
646 | } | ||
647 | |||
648 | out: | ||
649 | return -EIO; /* we fixed nothing */ | ||
650 | } | ||
651 | |||
616 | static void end_workqueue_bio(struct bio *bio, int err) | 652 | static void end_workqueue_bio(struct bio *bio, int err) |
617 | { | 653 | { |
618 | struct end_io_wq *end_io_wq = bio->bi_private; | 654 | struct end_io_wq *end_io_wq = bio->bi_private; |
@@ -974,11 +1010,43 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | |||
974 | if (!buf) | 1010 | if (!buf) |
975 | return 0; | 1011 | return 0; |
976 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | 1012 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, |
977 | buf, 0, 0, btree_get_extent, 0); | 1013 | buf, 0, WAIT_NONE, btree_get_extent, 0); |
978 | free_extent_buffer(buf); | 1014 | free_extent_buffer(buf); |
979 | return ret; | 1015 | return ret; |
980 | } | 1016 | } |
981 | 1017 | ||
1018 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
1019 | int mirror_num, struct extent_buffer **eb) | ||
1020 | { | ||
1021 | struct extent_buffer *buf = NULL; | ||
1022 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
1023 | struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; | ||
1024 | int ret; | ||
1025 | |||
1026 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
1027 | if (!buf) | ||
1028 | return 0; | ||
1029 | |||
1030 | set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); | ||
1031 | |||
1032 | ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK, | ||
1033 | btree_get_extent, mirror_num); | ||
1034 | if (ret) { | ||
1035 | free_extent_buffer(buf); | ||
1036 | return ret; | ||
1037 | } | ||
1038 | |||
1039 | if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { | ||
1040 | free_extent_buffer(buf); | ||
1041 | return -EIO; | ||
1042 | } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { | ||
1043 | *eb = buf; | ||
1044 | } else { | ||
1045 | free_extent_buffer(buf); | ||
1046 | } | ||
1047 | return 0; | ||
1048 | } | ||
1049 | |||
982 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 1050 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
983 | u64 bytenr, u32 blocksize) | 1051 | u64 bytenr, u32 blocksize) |
984 | { | 1052 | { |
@@ -1135,10 +1203,12 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
1135 | 1203 | ||
1136 | generation = btrfs_root_generation(&root->root_item); | 1204 | generation = btrfs_root_generation(&root->root_item); |
1137 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1205 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
1206 | root->commit_root = NULL; | ||
1138 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1207 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1139 | blocksize, generation); | 1208 | blocksize, generation); |
1140 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { | 1209 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { |
1141 | free_extent_buffer(root->node); | 1210 | free_extent_buffer(root->node); |
1211 | root->node = NULL; | ||
1142 | return -EIO; | 1212 | return -EIO; |
1143 | } | 1213 | } |
1144 | root->commit_root = btrfs_root_node(root); | 1214 | root->commit_root = btrfs_root_node(root); |
@@ -1577,6 +1647,228 @@ sleep: | |||
1577 | return 0; | 1647 | return 0; |
1578 | } | 1648 | } |
1579 | 1649 | ||
1650 | /* | ||
1651 | * this will find the highest generation in the array of | ||
1652 | * root backups. The index of the highest array is returned, | ||
1653 | * or -1 if we can't find anything. | ||
1654 | * | ||
1655 | * We check to make sure the array is valid by comparing the | ||
1656 | * generation of the latest root in the array with the generation | ||
1657 | * in the super block. If they don't match we pitch it. | ||
1658 | */ | ||
1659 | static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen) | ||
1660 | { | ||
1661 | u64 cur; | ||
1662 | int newest_index = -1; | ||
1663 | struct btrfs_root_backup *root_backup; | ||
1664 | int i; | ||
1665 | |||
1666 | for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { | ||
1667 | root_backup = info->super_copy->super_roots + i; | ||
1668 | cur = btrfs_backup_tree_root_gen(root_backup); | ||
1669 | if (cur == newest_gen) | ||
1670 | newest_index = i; | ||
1671 | } | ||
1672 | |||
1673 | /* check to see if we actually wrapped around */ | ||
1674 | if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) { | ||
1675 | root_backup = info->super_copy->super_roots; | ||
1676 | cur = btrfs_backup_tree_root_gen(root_backup); | ||
1677 | if (cur == newest_gen) | ||
1678 | newest_index = 0; | ||
1679 | } | ||
1680 | return newest_index; | ||
1681 | } | ||
1682 | |||
1683 | |||
1684 | /* | ||
1685 | * find the oldest backup so we know where to store new entries | ||
1686 | * in the backup array. This will set the backup_root_index | ||
1687 | * field in the fs_info struct | ||
1688 | */ | ||
1689 | static void find_oldest_super_backup(struct btrfs_fs_info *info, | ||
1690 | u64 newest_gen) | ||
1691 | { | ||
1692 | int newest_index = -1; | ||
1693 | |||
1694 | newest_index = find_newest_super_backup(info, newest_gen); | ||
1695 | /* if there was garbage in there, just move along */ | ||
1696 | if (newest_index == -1) { | ||
1697 | info->backup_root_index = 0; | ||
1698 | } else { | ||
1699 | info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS; | ||
1700 | } | ||
1701 | } | ||
1702 | |||
1703 | /* | ||
1704 | * copy all the root pointers into the super backup array. | ||
1705 | * this will bump the backup pointer by one when it is | ||
1706 | * done | ||
1707 | */ | ||
1708 | static void backup_super_roots(struct btrfs_fs_info *info) | ||
1709 | { | ||
1710 | int next_backup; | ||
1711 | struct btrfs_root_backup *root_backup; | ||
1712 | int last_backup; | ||
1713 | |||
1714 | next_backup = info->backup_root_index; | ||
1715 | last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) % | ||
1716 | BTRFS_NUM_BACKUP_ROOTS; | ||
1717 | |||
1718 | /* | ||
1719 | * just overwrite the last backup if we're at the same generation | ||
1720 | * this happens only at umount | ||
1721 | */ | ||
1722 | root_backup = info->super_for_commit->super_roots + last_backup; | ||
1723 | if (btrfs_backup_tree_root_gen(root_backup) == | ||
1724 | btrfs_header_generation(info->tree_root->node)) | ||
1725 | next_backup = last_backup; | ||
1726 | |||
1727 | root_backup = info->super_for_commit->super_roots + next_backup; | ||
1728 | |||
1729 | /* | ||
1730 | * make sure all of our padding and empty slots get zero filled | ||
1731 | * regardless of which ones we use today | ||
1732 | */ | ||
1733 | memset(root_backup, 0, sizeof(*root_backup)); | ||
1734 | |||
1735 | info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS; | ||
1736 | |||
1737 | btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start); | ||
1738 | btrfs_set_backup_tree_root_gen(root_backup, | ||
1739 | btrfs_header_generation(info->tree_root->node)); | ||
1740 | |||
1741 | btrfs_set_backup_tree_root_level(root_backup, | ||
1742 | btrfs_header_level(info->tree_root->node)); | ||
1743 | |||
1744 | btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start); | ||
1745 | btrfs_set_backup_chunk_root_gen(root_backup, | ||
1746 | btrfs_header_generation(info->chunk_root->node)); | ||
1747 | btrfs_set_backup_chunk_root_level(root_backup, | ||
1748 | btrfs_header_level(info->chunk_root->node)); | ||
1749 | |||
1750 | btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start); | ||
1751 | btrfs_set_backup_extent_root_gen(root_backup, | ||
1752 | btrfs_header_generation(info->extent_root->node)); | ||
1753 | btrfs_set_backup_extent_root_level(root_backup, | ||
1754 | btrfs_header_level(info->extent_root->node)); | ||
1755 | |||
1756 | btrfs_set_backup_fs_root(root_backup, info->fs_root->node->start); | ||
1757 | btrfs_set_backup_fs_root_gen(root_backup, | ||
1758 | btrfs_header_generation(info->fs_root->node)); | ||
1759 | btrfs_set_backup_fs_root_level(root_backup, | ||
1760 | btrfs_header_level(info->fs_root->node)); | ||
1761 | |||
1762 | btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start); | ||
1763 | btrfs_set_backup_dev_root_gen(root_backup, | ||
1764 | btrfs_header_generation(info->dev_root->node)); | ||
1765 | btrfs_set_backup_dev_root_level(root_backup, | ||
1766 | btrfs_header_level(info->dev_root->node)); | ||
1767 | |||
1768 | btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start); | ||
1769 | btrfs_set_backup_csum_root_gen(root_backup, | ||
1770 | btrfs_header_generation(info->csum_root->node)); | ||
1771 | btrfs_set_backup_csum_root_level(root_backup, | ||
1772 | btrfs_header_level(info->csum_root->node)); | ||
1773 | |||
1774 | btrfs_set_backup_total_bytes(root_backup, | ||
1775 | btrfs_super_total_bytes(info->super_copy)); | ||
1776 | btrfs_set_backup_bytes_used(root_backup, | ||
1777 | btrfs_super_bytes_used(info->super_copy)); | ||
1778 | btrfs_set_backup_num_devices(root_backup, | ||
1779 | btrfs_super_num_devices(info->super_copy)); | ||
1780 | |||
1781 | /* | ||
1782 | * if we don't copy this out to the super_copy, it won't get remembered | ||
1783 | * for the next commit | ||
1784 | */ | ||
1785 | memcpy(&info->super_copy->super_roots, | ||
1786 | &info->super_for_commit->super_roots, | ||
1787 | sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); | ||
1788 | } | ||
1789 | |||
1790 | /* | ||
1791 | * this copies info out of the root backup array and back into | ||
1792 | * the in-memory super block. It is meant to help iterate through | ||
1793 | * the array, so you send it the number of backups you've already | ||
1794 | * tried and the last backup index you used. | ||
1795 | * | ||
1796 | * this returns -1 when it has tried all the backups | ||
1797 | */ | ||
1798 | static noinline int next_root_backup(struct btrfs_fs_info *info, | ||
1799 | struct btrfs_super_block *super, | ||
1800 | int *num_backups_tried, int *backup_index) | ||
1801 | { | ||
1802 | struct btrfs_root_backup *root_backup; | ||
1803 | int newest = *backup_index; | ||
1804 | |||
1805 | if (*num_backups_tried == 0) { | ||
1806 | u64 gen = btrfs_super_generation(super); | ||
1807 | |||
1808 | newest = find_newest_super_backup(info, gen); | ||
1809 | if (newest == -1) | ||
1810 | return -1; | ||
1811 | |||
1812 | *backup_index = newest; | ||
1813 | *num_backups_tried = 1; | ||
1814 | } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) { | ||
1815 | /* we've tried all the backups, all done */ | ||
1816 | return -1; | ||
1817 | } else { | ||
1818 | /* jump to the next oldest backup */ | ||
1819 | newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) % | ||
1820 | BTRFS_NUM_BACKUP_ROOTS; | ||
1821 | *backup_index = newest; | ||
1822 | *num_backups_tried += 1; | ||
1823 | } | ||
1824 | root_backup = super->super_roots + newest; | ||
1825 | |||
1826 | btrfs_set_super_generation(super, | ||
1827 | btrfs_backup_tree_root_gen(root_backup)); | ||
1828 | btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup)); | ||
1829 | btrfs_set_super_root_level(super, | ||
1830 | btrfs_backup_tree_root_level(root_backup)); | ||
1831 | btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup)); | ||
1832 | |||
1833 | /* | ||
1834 | * fixme: the total bytes and num_devices need to match or we should | ||
1835 | * need a fsck | ||
1836 | */ | ||
1837 | btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup)); | ||
1838 | btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup)); | ||
1839 | return 0; | ||
1840 | } | ||
1841 | |||
1842 | /* helper to cleanup tree roots */ | ||
1843 | static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | ||
1844 | { | ||
1845 | free_extent_buffer(info->tree_root->node); | ||
1846 | free_extent_buffer(info->tree_root->commit_root); | ||
1847 | free_extent_buffer(info->dev_root->node); | ||
1848 | free_extent_buffer(info->dev_root->commit_root); | ||
1849 | free_extent_buffer(info->extent_root->node); | ||
1850 | free_extent_buffer(info->extent_root->commit_root); | ||
1851 | free_extent_buffer(info->csum_root->node); | ||
1852 | free_extent_buffer(info->csum_root->commit_root); | ||
1853 | |||
1854 | info->tree_root->node = NULL; | ||
1855 | info->tree_root->commit_root = NULL; | ||
1856 | info->dev_root->node = NULL; | ||
1857 | info->dev_root->commit_root = NULL; | ||
1858 | info->extent_root->node = NULL; | ||
1859 | info->extent_root->commit_root = NULL; | ||
1860 | info->csum_root->node = NULL; | ||
1861 | info->csum_root->commit_root = NULL; | ||
1862 | |||
1863 | if (chunk_root) { | ||
1864 | free_extent_buffer(info->chunk_root->node); | ||
1865 | free_extent_buffer(info->chunk_root->commit_root); | ||
1866 | info->chunk_root->node = NULL; | ||
1867 | info->chunk_root->commit_root = NULL; | ||
1868 | } | ||
1869 | } | ||
1870 | |||
1871 | |||
1580 | struct btrfs_root *open_ctree(struct super_block *sb, | 1872 | struct btrfs_root *open_ctree(struct super_block *sb, |
1581 | struct btrfs_fs_devices *fs_devices, | 1873 | struct btrfs_fs_devices *fs_devices, |
1582 | char *options) | 1874 | char *options) |
@@ -1604,6 +1896,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1604 | 1896 | ||
1605 | int ret; | 1897 | int ret; |
1606 | int err = -EINVAL; | 1898 | int err = -EINVAL; |
1899 | int num_backups_tried = 0; | ||
1900 | int backup_index = 0; | ||
1607 | 1901 | ||
1608 | struct btrfs_super_block *disk_super; | 1902 | struct btrfs_super_block *disk_super; |
1609 | 1903 | ||
@@ -1648,6 +1942,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1648 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1942 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
1649 | spin_lock_init(&fs_info->delayed_iput_lock); | 1943 | spin_lock_init(&fs_info->delayed_iput_lock); |
1650 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1944 | spin_lock_init(&fs_info->defrag_inodes_lock); |
1945 | spin_lock_init(&fs_info->free_chunk_lock); | ||
1651 | mutex_init(&fs_info->reloc_mutex); | 1946 | mutex_init(&fs_info->reloc_mutex); |
1652 | 1947 | ||
1653 | init_completion(&fs_info->kobj_unregister); | 1948 | init_completion(&fs_info->kobj_unregister); |
@@ -1665,8 +1960,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1665 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | 1960 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); |
1666 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | 1961 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); |
1667 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | 1962 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); |
1668 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | 1963 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv); |
1669 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
1670 | atomic_set(&fs_info->nr_async_submits, 0); | 1964 | atomic_set(&fs_info->nr_async_submits, 0); |
1671 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1965 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1672 | atomic_set(&fs_info->async_submit_draining, 0); | 1966 | atomic_set(&fs_info->async_submit_draining, 0); |
@@ -1677,6 +1971,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1677 | fs_info->metadata_ratio = 0; | 1971 | fs_info->metadata_ratio = 0; |
1678 | fs_info->defrag_inodes = RB_ROOT; | 1972 | fs_info->defrag_inodes = RB_ROOT; |
1679 | fs_info->trans_no_join = 0; | 1973 | fs_info->trans_no_join = 0; |
1974 | fs_info->free_chunk_space = 0; | ||
1975 | |||
1976 | /* readahead state */ | ||
1977 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | ||
1978 | spin_lock_init(&fs_info->reada_lock); | ||
1680 | 1979 | ||
1681 | fs_info->thread_pool_size = min_t(unsigned long, | 1980 | fs_info->thread_pool_size = min_t(unsigned long, |
1682 | num_online_cpus() + 2, 8); | 1981 | num_online_cpus() + 2, 8); |
@@ -1766,14 +2065,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1766 | goto fail_alloc; | 2065 | goto fail_alloc; |
1767 | } | 2066 | } |
1768 | 2067 | ||
1769 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 2068 | memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); |
1770 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 2069 | memcpy(fs_info->super_for_commit, fs_info->super_copy, |
1771 | sizeof(fs_info->super_for_commit)); | 2070 | sizeof(*fs_info->super_for_commit)); |
1772 | brelse(bh); | 2071 | brelse(bh); |
1773 | 2072 | ||
1774 | memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); | 2073 | memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); |
1775 | 2074 | ||
1776 | disk_super = &fs_info->super_copy; | 2075 | disk_super = fs_info->super_copy; |
1777 | if (!btrfs_super_root(disk_super)) | 2076 | if (!btrfs_super_root(disk_super)) |
1778 | goto fail_alloc; | 2077 | goto fail_alloc; |
1779 | 2078 | ||
@@ -1783,6 +2082,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1783 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 2082 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
1784 | 2083 | ||
1785 | /* | 2084 | /* |
2085 | * run through our array of backup supers and setup | ||
2086 | * our ring pointer to the oldest one | ||
2087 | */ | ||
2088 | generation = btrfs_super_generation(disk_super); | ||
2089 | find_oldest_super_backup(fs_info, generation); | ||
2090 | |||
2091 | /* | ||
1786 | * In the long term, we'll store the compression type in the super | 2092 | * In the long term, we'll store the compression type in the super |
1787 | * block, and it'll be used for per file compression control. | 2093 | * block, and it'll be used for per file compression control. |
1788 | */ | 2094 | */ |
@@ -1870,6 +2176,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1870 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | 2176 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", |
1871 | fs_info->thread_pool_size, | 2177 | fs_info->thread_pool_size, |
1872 | &fs_info->generic_worker); | 2178 | &fs_info->generic_worker); |
2179 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
2180 | fs_info->thread_pool_size, | ||
2181 | &fs_info->generic_worker); | ||
1873 | 2182 | ||
1874 | /* | 2183 | /* |
1875 | * endios are largely parallel and should have a very | 2184 | * endios are largely parallel and should have a very |
@@ -1880,6 +2189,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1880 | 2189 | ||
1881 | fs_info->endio_write_workers.idle_thresh = 2; | 2190 | fs_info->endio_write_workers.idle_thresh = 2; |
1882 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2191 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
2192 | fs_info->readahead_workers.idle_thresh = 2; | ||
1883 | 2193 | ||
1884 | btrfs_start_workers(&fs_info->workers, 1); | 2194 | btrfs_start_workers(&fs_info->workers, 1); |
1885 | btrfs_start_workers(&fs_info->generic_worker, 1); | 2195 | btrfs_start_workers(&fs_info->generic_worker, 1); |
@@ -1893,6 +2203,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1893 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 2203 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
1894 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 2204 | btrfs_start_workers(&fs_info->delayed_workers, 1); |
1895 | btrfs_start_workers(&fs_info->caching_workers, 1); | 2205 | btrfs_start_workers(&fs_info->caching_workers, 1); |
2206 | btrfs_start_workers(&fs_info->readahead_workers, 1); | ||
1896 | 2207 | ||
1897 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 2208 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1898 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 2209 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1939,7 +2250,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1939 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | 2250 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { |
1940 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", | 2251 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", |
1941 | sb->s_id); | 2252 | sb->s_id); |
1942 | goto fail_chunk_root; | 2253 | goto fail_tree_roots; |
1943 | } | 2254 | } |
1944 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); | 2255 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); |
1945 | chunk_root->commit_root = btrfs_root_node(chunk_root); | 2256 | chunk_root->commit_root = btrfs_root_node(chunk_root); |
@@ -1954,11 +2265,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1954 | if (ret) { | 2265 | if (ret) { |
1955 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", | 2266 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", |
1956 | sb->s_id); | 2267 | sb->s_id); |
1957 | goto fail_chunk_root; | 2268 | goto fail_tree_roots; |
1958 | } | 2269 | } |
1959 | 2270 | ||
1960 | btrfs_close_extra_devices(fs_devices); | 2271 | btrfs_close_extra_devices(fs_devices); |
1961 | 2272 | ||
2273 | retry_root_backup: | ||
1962 | blocksize = btrfs_level_size(tree_root, | 2274 | blocksize = btrfs_level_size(tree_root, |
1963 | btrfs_super_root_level(disk_super)); | 2275 | btrfs_super_root_level(disk_super)); |
1964 | generation = btrfs_super_generation(disk_super); | 2276 | generation = btrfs_super_generation(disk_super); |
@@ -1966,32 +2278,33 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1966 | tree_root->node = read_tree_block(tree_root, | 2278 | tree_root->node = read_tree_block(tree_root, |
1967 | btrfs_super_root(disk_super), | 2279 | btrfs_super_root(disk_super), |
1968 | blocksize, generation); | 2280 | blocksize, generation); |
1969 | if (!tree_root->node) | 2281 | if (!tree_root->node || |
1970 | goto fail_chunk_root; | 2282 | !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { |
1971 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { | ||
1972 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", | 2283 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", |
1973 | sb->s_id); | 2284 | sb->s_id); |
1974 | goto fail_tree_root; | 2285 | |
2286 | goto recovery_tree_root; | ||
1975 | } | 2287 | } |
2288 | |||
1976 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | 2289 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); |
1977 | tree_root->commit_root = btrfs_root_node(tree_root); | 2290 | tree_root->commit_root = btrfs_root_node(tree_root); |
1978 | 2291 | ||
1979 | ret = find_and_setup_root(tree_root, fs_info, | 2292 | ret = find_and_setup_root(tree_root, fs_info, |
1980 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | 2293 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); |
1981 | if (ret) | 2294 | if (ret) |
1982 | goto fail_tree_root; | 2295 | goto recovery_tree_root; |
1983 | extent_root->track_dirty = 1; | 2296 | extent_root->track_dirty = 1; |
1984 | 2297 | ||
1985 | ret = find_and_setup_root(tree_root, fs_info, | 2298 | ret = find_and_setup_root(tree_root, fs_info, |
1986 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 2299 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
1987 | if (ret) | 2300 | if (ret) |
1988 | goto fail_extent_root; | 2301 | goto recovery_tree_root; |
1989 | dev_root->track_dirty = 1; | 2302 | dev_root->track_dirty = 1; |
1990 | 2303 | ||
1991 | ret = find_and_setup_root(tree_root, fs_info, | 2304 | ret = find_and_setup_root(tree_root, fs_info, |
1992 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | 2305 | BTRFS_CSUM_TREE_OBJECTID, csum_root); |
1993 | if (ret) | 2306 | if (ret) |
1994 | goto fail_dev_root; | 2307 | goto recovery_tree_root; |
1995 | 2308 | ||
1996 | csum_root->track_dirty = 1; | 2309 | csum_root->track_dirty = 1; |
1997 | 2310 | ||
@@ -2124,20 +2437,10 @@ fail_cleaner: | |||
2124 | 2437 | ||
2125 | fail_block_groups: | 2438 | fail_block_groups: |
2126 | btrfs_free_block_groups(fs_info); | 2439 | btrfs_free_block_groups(fs_info); |
2127 | free_extent_buffer(csum_root->node); | 2440 | |
2128 | free_extent_buffer(csum_root->commit_root); | 2441 | fail_tree_roots: |
2129 | fail_dev_root: | 2442 | free_root_pointers(fs_info, 1); |
2130 | free_extent_buffer(dev_root->node); | 2443 | |
2131 | free_extent_buffer(dev_root->commit_root); | ||
2132 | fail_extent_root: | ||
2133 | free_extent_buffer(extent_root->node); | ||
2134 | free_extent_buffer(extent_root->commit_root); | ||
2135 | fail_tree_root: | ||
2136 | free_extent_buffer(tree_root->node); | ||
2137 | free_extent_buffer(tree_root->commit_root); | ||
2138 | fail_chunk_root: | ||
2139 | free_extent_buffer(chunk_root->node); | ||
2140 | free_extent_buffer(chunk_root->commit_root); | ||
2141 | fail_sb_buffer: | 2444 | fail_sb_buffer: |
2142 | btrfs_stop_workers(&fs_info->generic_worker); | 2445 | btrfs_stop_workers(&fs_info->generic_worker); |
2143 | btrfs_stop_workers(&fs_info->fixup_workers); | 2446 | btrfs_stop_workers(&fs_info->fixup_workers); |
@@ -2152,7 +2455,6 @@ fail_sb_buffer: | |||
2152 | btrfs_stop_workers(&fs_info->delayed_workers); | 2455 | btrfs_stop_workers(&fs_info->delayed_workers); |
2153 | btrfs_stop_workers(&fs_info->caching_workers); | 2456 | btrfs_stop_workers(&fs_info->caching_workers); |
2154 | fail_alloc: | 2457 | fail_alloc: |
2155 | kfree(fs_info->delayed_root); | ||
2156 | fail_iput: | 2458 | fail_iput: |
2157 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2459 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2158 | iput(fs_info->btree_inode); | 2460 | iput(fs_info->btree_inode); |
@@ -2164,13 +2466,27 @@ fail_bdi: | |||
2164 | fail_srcu: | 2466 | fail_srcu: |
2165 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2467 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2166 | fail: | 2468 | fail: |
2167 | kfree(extent_root); | 2469 | free_fs_info(fs_info); |
2168 | kfree(tree_root); | ||
2169 | kfree(fs_info); | ||
2170 | kfree(chunk_root); | ||
2171 | kfree(dev_root); | ||
2172 | kfree(csum_root); | ||
2173 | return ERR_PTR(err); | 2470 | return ERR_PTR(err); |
2471 | |||
2472 | recovery_tree_root: | ||
2473 | |||
2474 | if (!btrfs_test_opt(tree_root, RECOVERY)) | ||
2475 | goto fail_tree_roots; | ||
2476 | |||
2477 | free_root_pointers(fs_info, 0); | ||
2478 | |||
2479 | /* don't use the log in recovery mode, it won't be valid */ | ||
2480 | btrfs_set_super_log_root(disk_super, 0); | ||
2481 | |||
2482 | /* we can't trust the free space cache either */ | ||
2483 | btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); | ||
2484 | |||
2485 | ret = next_root_backup(fs_info, fs_info->super_copy, | ||
2486 | &num_backups_tried, &backup_index); | ||
2487 | if (ret == -1) | ||
2488 | goto fail_block_groups; | ||
2489 | goto retry_root_backup; | ||
2174 | } | 2490 | } |
2175 | 2491 | ||
2176 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 2492 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
@@ -2338,10 +2654,11 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2338 | int total_errors = 0; | 2654 | int total_errors = 0; |
2339 | u64 flags; | 2655 | u64 flags; |
2340 | 2656 | ||
2341 | max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | 2657 | max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
2342 | do_barriers = !btrfs_test_opt(root, NOBARRIER); | 2658 | do_barriers = !btrfs_test_opt(root, NOBARRIER); |
2659 | backup_super_roots(root->fs_info); | ||
2343 | 2660 | ||
2344 | sb = &root->fs_info->super_for_commit; | 2661 | sb = root->fs_info->super_for_commit; |
2345 | dev_item = &sb->dev_item; | 2662 | dev_item = &sb->dev_item; |
2346 | 2663 | ||
2347 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2664 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
@@ -2545,8 +2862,6 @@ int close_ctree(struct btrfs_root *root) | |||
2545 | /* clear out the rbtree of defraggable inodes */ | 2862 | /* clear out the rbtree of defraggable inodes */ |
2546 | btrfs_run_defrag_inodes(root->fs_info); | 2863 | btrfs_run_defrag_inodes(root->fs_info); |
2547 | 2864 | ||
2548 | btrfs_put_block_group_cache(fs_info); | ||
2549 | |||
2550 | /* | 2865 | /* |
2551 | * Here come 2 situations when btrfs is broken to flip readonly: | 2866 | * Here come 2 situations when btrfs is broken to flip readonly: |
2552 | * | 2867 | * |
@@ -2572,6 +2887,8 @@ int close_ctree(struct btrfs_root *root) | |||
2572 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2887 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2573 | } | 2888 | } |
2574 | 2889 | ||
2890 | btrfs_put_block_group_cache(fs_info); | ||
2891 | |||
2575 | kthread_stop(root->fs_info->transaction_kthread); | 2892 | kthread_stop(root->fs_info->transaction_kthread); |
2576 | kthread_stop(root->fs_info->cleaner_kthread); | 2893 | kthread_stop(root->fs_info->cleaner_kthread); |
2577 | 2894 | ||
@@ -2603,7 +2920,6 @@ int close_ctree(struct btrfs_root *root) | |||
2603 | del_fs_roots(fs_info); | 2920 | del_fs_roots(fs_info); |
2604 | 2921 | ||
2605 | iput(fs_info->btree_inode); | 2922 | iput(fs_info->btree_inode); |
2606 | kfree(fs_info->delayed_root); | ||
2607 | 2923 | ||
2608 | btrfs_stop_workers(&fs_info->generic_worker); | 2924 | btrfs_stop_workers(&fs_info->generic_worker); |
2609 | btrfs_stop_workers(&fs_info->fixup_workers); | 2925 | btrfs_stop_workers(&fs_info->fixup_workers); |
@@ -2617,6 +2933,7 @@ int close_ctree(struct btrfs_root *root) | |||
2617 | btrfs_stop_workers(&fs_info->submit_workers); | 2933 | btrfs_stop_workers(&fs_info->submit_workers); |
2618 | btrfs_stop_workers(&fs_info->delayed_workers); | 2934 | btrfs_stop_workers(&fs_info->delayed_workers); |
2619 | btrfs_stop_workers(&fs_info->caching_workers); | 2935 | btrfs_stop_workers(&fs_info->caching_workers); |
2936 | btrfs_stop_workers(&fs_info->readahead_workers); | ||
2620 | 2937 | ||
2621 | btrfs_close_devices(fs_info->fs_devices); | 2938 | btrfs_close_devices(fs_info->fs_devices); |
2622 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2939 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
@@ -2624,12 +2941,7 @@ int close_ctree(struct btrfs_root *root) | |||
2624 | bdi_destroy(&fs_info->bdi); | 2941 | bdi_destroy(&fs_info->bdi); |
2625 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2942 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2626 | 2943 | ||
2627 | kfree(fs_info->extent_root); | 2944 | free_fs_info(fs_info); |
2628 | kfree(fs_info->tree_root); | ||
2629 | kfree(fs_info->chunk_root); | ||
2630 | kfree(fs_info->dev_root); | ||
2631 | kfree(fs_info->csum_root); | ||
2632 | kfree(fs_info); | ||
2633 | 2945 | ||
2634 | return 0; | 2946 | return 0; |
2635 | } | 2947 | } |
@@ -2735,7 +3047,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2735 | return ret; | 3047 | return ret; |
2736 | } | 3048 | } |
2737 | 3049 | ||
2738 | int btree_lock_page_hook(struct page *page) | 3050 | static int btree_lock_page_hook(struct page *page, void *data, |
3051 | void (*flush_fn)(void *)) | ||
2739 | { | 3052 | { |
2740 | struct inode *inode = page->mapping->host; | 3053 | struct inode *inode = page->mapping->host; |
2741 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3054 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -2752,7 +3065,10 @@ int btree_lock_page_hook(struct page *page) | |||
2752 | if (!eb) | 3065 | if (!eb) |
2753 | goto out; | 3066 | goto out; |
2754 | 3067 | ||
2755 | btrfs_tree_lock(eb); | 3068 | if (!btrfs_try_tree_write_lock(eb)) { |
3069 | flush_fn(data); | ||
3070 | btrfs_tree_lock(eb); | ||
3071 | } | ||
2756 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 3072 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2757 | 3073 | ||
2758 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3074 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
@@ -2767,7 +3083,10 @@ int btree_lock_page_hook(struct page *page) | |||
2767 | btrfs_tree_unlock(eb); | 3083 | btrfs_tree_unlock(eb); |
2768 | free_extent_buffer(eb); | 3084 | free_extent_buffer(eb); |
2769 | out: | 3085 | out: |
2770 | lock_page(page); | 3086 | if (!trylock_page(page)) { |
3087 | flush_fn(data); | ||
3088 | lock_page(page); | ||
3089 | } | ||
2771 | return 0; | 3090 | return 0; |
2772 | } | 3091 | } |
2773 | 3092 | ||
@@ -3123,6 +3442,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3123 | static struct extent_io_ops btree_extent_io_ops = { | 3442 | static struct extent_io_ops btree_extent_io_ops = { |
3124 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3443 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
3125 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3444 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
3445 | .readpage_io_failed_hook = btree_io_failed_hook, | ||
3126 | .submit_bio_hook = btree_submit_bio_hook, | 3446 | .submit_bio_hook = btree_submit_bio_hook, |
3127 | /* note we're sharing with inode.c for the merge bio hook */ | 3447 | /* note we're sharing with inode.c for the merge bio hook */ |
3128 | .merge_bio_hook = btrfs_merge_bio_hook, | 3448 | .merge_bio_hook = btrfs_merge_bio_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index bec3ea4bd67f..c99d0a8f13fa 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -40,6 +40,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
40 | u32 blocksize, u64 parent_transid); | 40 | u32 blocksize, u64 parent_transid); |
41 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 41 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
42 | u64 parent_transid); | 42 | u64 parent_transid); |
43 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
44 | int mirror_num, struct extent_buffer **eb); | ||
43 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 45 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
44 | u64 bytenr, u32 blocksize); | 46 | u64 bytenr, u32 blocksize); |
45 | int clean_tree_block(struct btrfs_trans_handle *trans, | 47 | int clean_tree_block(struct btrfs_trans_handle *trans, |
@@ -83,8 +85,6 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | |||
83 | struct btrfs_fs_info *fs_info); | 85 | struct btrfs_fs_info *fs_info); |
84 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 86 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
85 | struct btrfs_root *root); | 87 | struct btrfs_root *root); |
86 | int btree_lock_page_hook(struct page *page); | ||
87 | |||
88 | 88 | ||
89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
90 | void btrfs_init_lockdep(void); | 90 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 119f842c1d4f..18ea90c8943b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/ratelimit.h> | ||
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "hash.h" | 28 | #include "hash.h" |
28 | #include "ctree.h" | 29 | #include "ctree.h" |
@@ -52,6 +53,21 @@ enum { | |||
52 | CHUNK_ALLOC_LIMITED = 2, | 53 | CHUNK_ALLOC_LIMITED = 2, |
53 | }; | 54 | }; |
54 | 55 | ||
56 | /* | ||
57 | * Control how reservations are dealt with. | ||
58 | * | ||
59 | * RESERVE_FREE - freeing a reservation. | ||
60 | * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for | ||
61 | * ENOSPC accounting | ||
62 | * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update | ||
63 | * bytes_may_use as the ENOSPC accounting is done elsewhere | ||
64 | */ | ||
65 | enum { | ||
66 | RESERVE_FREE = 0, | ||
67 | RESERVE_ALLOC = 1, | ||
68 | RESERVE_ALLOC_NO_ACCOUNT = 2, | ||
69 | }; | ||
70 | |||
55 | static int update_block_group(struct btrfs_trans_handle *trans, | 71 | static int update_block_group(struct btrfs_trans_handle *trans, |
56 | struct btrfs_root *root, | 72 | struct btrfs_root *root, |
57 | u64 bytenr, u64 num_bytes, int alloc); | 73 | u64 bytenr, u64 num_bytes, int alloc); |
@@ -81,6 +97,8 @@ static int find_next_key(struct btrfs_path *path, int level, | |||
81 | struct btrfs_key *key); | 97 | struct btrfs_key *key); |
82 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 98 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
83 | int dump_block_groups); | 99 | int dump_block_groups); |
100 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
101 | u64 num_bytes, int reserve); | ||
84 | 102 | ||
85 | static noinline int | 103 | static noinline int |
86 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 104 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -104,7 +122,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | |||
104 | if (atomic_dec_and_test(&cache->count)) { | 122 | if (atomic_dec_and_test(&cache->count)) { |
105 | WARN_ON(cache->pinned > 0); | 123 | WARN_ON(cache->pinned > 0); |
106 | WARN_ON(cache->reserved > 0); | 124 | WARN_ON(cache->reserved > 0); |
107 | WARN_ON(cache->reserved_pinned > 0); | ||
108 | kfree(cache->free_space_ctl); | 125 | kfree(cache->free_space_ctl); |
109 | kfree(cache); | 126 | kfree(cache); |
110 | } | 127 | } |
@@ -465,7 +482,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
465 | * we likely hold important locks. | 482 | * we likely hold important locks. |
466 | */ | 483 | */ |
467 | if (trans && (!trans->transaction->in_commit) && | 484 | if (trans && (!trans->transaction->in_commit) && |
468 | (root && root != root->fs_info->tree_root)) { | 485 | (root && root != root->fs_info->tree_root) && |
486 | btrfs_test_opt(root, SPACE_CACHE)) { | ||
469 | spin_lock(&cache->lock); | 487 | spin_lock(&cache->lock); |
470 | if (cache->cached != BTRFS_CACHE_NO) { | 488 | if (cache->cached != BTRFS_CACHE_NO) { |
471 | spin_unlock(&cache->lock); | 489 | spin_unlock(&cache->lock); |
@@ -2700,6 +2718,13 @@ again: | |||
2700 | goto again; | 2718 | goto again; |
2701 | } | 2719 | } |
2702 | 2720 | ||
2721 | /* We've already setup this transaction, go ahead and exit */ | ||
2722 | if (block_group->cache_generation == trans->transid && | ||
2723 | i_size_read(inode)) { | ||
2724 | dcs = BTRFS_DC_SETUP; | ||
2725 | goto out_put; | ||
2726 | } | ||
2727 | |||
2703 | /* | 2728 | /* |
2704 | * We want to set the generation to 0, that way if anything goes wrong | 2729 | * We want to set the generation to 0, that way if anything goes wrong |
2705 | * from here on out we know not to trust this cache when we load up next | 2730 | * from here on out we know not to trust this cache when we load up next |
@@ -2749,12 +2774,15 @@ again: | |||
2749 | if (!ret) | 2774 | if (!ret) |
2750 | dcs = BTRFS_DC_SETUP; | 2775 | dcs = BTRFS_DC_SETUP; |
2751 | btrfs_free_reserved_data_space(inode, num_pages); | 2776 | btrfs_free_reserved_data_space(inode, num_pages); |
2777 | |||
2752 | out_put: | 2778 | out_put: |
2753 | iput(inode); | 2779 | iput(inode); |
2754 | out_free: | 2780 | out_free: |
2755 | btrfs_release_path(path); | 2781 | btrfs_release_path(path); |
2756 | out: | 2782 | out: |
2757 | spin_lock(&block_group->lock); | 2783 | spin_lock(&block_group->lock); |
2784 | if (!ret) | ||
2785 | block_group->cache_generation = trans->transid; | ||
2758 | block_group->disk_cache_state = dcs; | 2786 | block_group->disk_cache_state = dcs; |
2759 | spin_unlock(&block_group->lock); | 2787 | spin_unlock(&block_group->lock); |
2760 | 2788 | ||
@@ -3122,16 +3150,13 @@ commit_trans: | |||
3122 | return -ENOSPC; | 3150 | return -ENOSPC; |
3123 | } | 3151 | } |
3124 | data_sinfo->bytes_may_use += bytes; | 3152 | data_sinfo->bytes_may_use += bytes; |
3125 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
3126 | spin_unlock(&data_sinfo->lock); | 3153 | spin_unlock(&data_sinfo->lock); |
3127 | 3154 | ||
3128 | return 0; | 3155 | return 0; |
3129 | } | 3156 | } |
3130 | 3157 | ||
3131 | /* | 3158 | /* |
3132 | * called when we are clearing an delalloc extent from the | 3159 | * Called if we need to clear a data reservation for this inode. |
3133 | * inode's io_tree or there was an error for whatever reason | ||
3134 | * after calling btrfs_check_data_free_space | ||
3135 | */ | 3160 | */ |
3136 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | 3161 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
3137 | { | 3162 | { |
@@ -3144,7 +3169,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
3144 | data_sinfo = BTRFS_I(inode)->space_info; | 3169 | data_sinfo = BTRFS_I(inode)->space_info; |
3145 | spin_lock(&data_sinfo->lock); | 3170 | spin_lock(&data_sinfo->lock); |
3146 | data_sinfo->bytes_may_use -= bytes; | 3171 | data_sinfo->bytes_may_use -= bytes; |
3147 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
3148 | spin_unlock(&data_sinfo->lock); | 3172 | spin_unlock(&data_sinfo->lock); |
3149 | } | 3173 | } |
3150 | 3174 | ||
@@ -3165,6 +3189,7 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3165 | struct btrfs_space_info *sinfo, u64 alloc_bytes, | 3189 | struct btrfs_space_info *sinfo, u64 alloc_bytes, |
3166 | int force) | 3190 | int force) |
3167 | { | 3191 | { |
3192 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3168 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3193 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
3169 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; | 3194 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; |
3170 | u64 thresh; | 3195 | u64 thresh; |
@@ -3173,11 +3198,18 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3173 | return 1; | 3198 | return 1; |
3174 | 3199 | ||
3175 | /* | 3200 | /* |
3201 | * We need to take into account the global rsv because for all intents | ||
3202 | * and purposes it's used space. Don't worry about locking the | ||
3203 | * global_rsv, it doesn't change except when the transaction commits. | ||
3204 | */ | ||
3205 | num_allocated += global_rsv->size; | ||
3206 | |||
3207 | /* | ||
3176 | * in limited mode, we want to have some free space up to | 3208 | * in limited mode, we want to have some free space up to |
3177 | * about 1% of the FS size. | 3209 | * about 1% of the FS size. |
3178 | */ | 3210 | */ |
3179 | if (force == CHUNK_ALLOC_LIMITED) { | 3211 | if (force == CHUNK_ALLOC_LIMITED) { |
3180 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3212 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); |
3181 | thresh = max_t(u64, 64 * 1024 * 1024, | 3213 | thresh = max_t(u64, 64 * 1024 * 1024, |
3182 | div_factor_fine(thresh, 1)); | 3214 | div_factor_fine(thresh, 1)); |
3183 | 3215 | ||
@@ -3199,7 +3231,7 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3199 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) | 3231 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) |
3200 | return 0; | 3232 | return 0; |
3201 | 3233 | ||
3202 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3234 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); |
3203 | 3235 | ||
3204 | /* 256MB or 5% of the FS */ | 3236 | /* 256MB or 5% of the FS */ |
3205 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | 3237 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); |
@@ -3302,24 +3334,26 @@ out: | |||
3302 | /* | 3334 | /* |
3303 | * shrink metadata reservation for delalloc | 3335 | * shrink metadata reservation for delalloc |
3304 | */ | 3336 | */ |
3305 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | 3337 | static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, |
3306 | struct btrfs_root *root, u64 to_reclaim, int sync) | 3338 | bool wait_ordered) |
3307 | { | 3339 | { |
3308 | struct btrfs_block_rsv *block_rsv; | 3340 | struct btrfs_block_rsv *block_rsv; |
3309 | struct btrfs_space_info *space_info; | 3341 | struct btrfs_space_info *space_info; |
3342 | struct btrfs_trans_handle *trans; | ||
3310 | u64 reserved; | 3343 | u64 reserved; |
3311 | u64 max_reclaim; | 3344 | u64 max_reclaim; |
3312 | u64 reclaimed = 0; | 3345 | u64 reclaimed = 0; |
3313 | long time_left; | 3346 | long time_left; |
3314 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3347 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3315 | int loops = 0; | 3348 | int loops = 0; |
3316 | unsigned long progress; | 3349 | unsigned long progress; |
3317 | 3350 | ||
3351 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
3318 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3352 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3319 | space_info = block_rsv->space_info; | 3353 | space_info = block_rsv->space_info; |
3320 | 3354 | ||
3321 | smp_mb(); | 3355 | smp_mb(); |
3322 | reserved = space_info->bytes_reserved; | 3356 | reserved = space_info->bytes_may_use; |
3323 | progress = space_info->reservation_progress; | 3357 | progress = space_info->reservation_progress; |
3324 | 3358 | ||
3325 | if (reserved == 0) | 3359 | if (reserved == 0) |
@@ -3334,7 +3368,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3334 | } | 3368 | } |
3335 | 3369 | ||
3336 | max_reclaim = min(reserved, to_reclaim); | 3370 | max_reclaim = min(reserved, to_reclaim); |
3337 | 3371 | nr_pages = max_t(unsigned long, nr_pages, | |
3372 | max_reclaim >> PAGE_CACHE_SHIFT); | ||
3338 | while (loops < 1024) { | 3373 | while (loops < 1024) { |
3339 | /* have the flusher threads jump in and do some IO */ | 3374 | /* have the flusher threads jump in and do some IO */ |
3340 | smp_mb(); | 3375 | smp_mb(); |
@@ -3343,9 +3378,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3343 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); | 3378 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); |
3344 | 3379 | ||
3345 | spin_lock(&space_info->lock); | 3380 | spin_lock(&space_info->lock); |
3346 | if (reserved > space_info->bytes_reserved) | 3381 | if (reserved > space_info->bytes_may_use) |
3347 | reclaimed += reserved - space_info->bytes_reserved; | 3382 | reclaimed += reserved - space_info->bytes_may_use; |
3348 | reserved = space_info->bytes_reserved; | 3383 | reserved = space_info->bytes_may_use; |
3349 | spin_unlock(&space_info->lock); | 3384 | spin_unlock(&space_info->lock); |
3350 | 3385 | ||
3351 | loops++; | 3386 | loops++; |
@@ -3356,11 +3391,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3356 | if (trans && trans->transaction->blocked) | 3391 | if (trans && trans->transaction->blocked) |
3357 | return -EAGAIN; | 3392 | return -EAGAIN; |
3358 | 3393 | ||
3359 | time_left = schedule_timeout_interruptible(1); | 3394 | if (wait_ordered && !trans) { |
3395 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3396 | } else { | ||
3397 | time_left = schedule_timeout_interruptible(1); | ||
3360 | 3398 | ||
3361 | /* We were interrupted, exit */ | 3399 | /* We were interrupted, exit */ |
3362 | if (time_left) | 3400 | if (time_left) |
3363 | break; | 3401 | break; |
3402 | } | ||
3364 | 3403 | ||
3365 | /* we've kicked the IO a few times, if anything has been freed, | 3404 | /* we've kicked the IO a few times, if anything has been freed, |
3366 | * exit. There is no sense in looping here for a long time | 3405 | * exit. There is no sense in looping here for a long time |
@@ -3375,34 +3414,90 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3375 | } | 3414 | } |
3376 | 3415 | ||
3377 | } | 3416 | } |
3378 | if (reclaimed >= to_reclaim && !trans) | 3417 | |
3379 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3380 | return reclaimed >= to_reclaim; | 3418 | return reclaimed >= to_reclaim; |
3381 | } | 3419 | } |
3382 | 3420 | ||
3383 | /* | 3421 | /** |
3384 | * Retries tells us how many times we've called reserve_metadata_bytes. The | 3422 | * maybe_commit_transaction - possibly commit the transaction if its ok to |
3385 | * idea is if this is the first call (retries == 0) then we will add to our | 3423 | * @root - the root we're allocating for |
3386 | * reserved count if we can't make the allocation in order to hold our place | 3424 | * @bytes - the number of bytes we want to reserve |
3387 | * while we go and try and free up space. That way for retries > 1 we don't try | 3425 | * @force - force the commit |
3388 | * and add space, we just check to see if the amount of unused space is >= the | ||
3389 | * total space, meaning that our reservation is valid. | ||
3390 | * | 3426 | * |
3391 | * However if we don't intend to retry this reservation, pass -1 as retries so | 3427 | * This will check to make sure that committing the transaction will actually |
3392 | * that it short circuits this logic. | 3428 | * get us somewhere and then commit the transaction if it does. Otherwise it |
3429 | * will return -ENOSPC. | ||
3393 | */ | 3430 | */ |
3394 | static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | 3431 | static int may_commit_transaction(struct btrfs_root *root, |
3395 | struct btrfs_root *root, | 3432 | struct btrfs_space_info *space_info, |
3433 | u64 bytes, int force) | ||
3434 | { | ||
3435 | struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv; | ||
3436 | struct btrfs_trans_handle *trans; | ||
3437 | |||
3438 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
3439 | if (trans) | ||
3440 | return -EAGAIN; | ||
3441 | |||
3442 | if (force) | ||
3443 | goto commit; | ||
3444 | |||
3445 | /* See if there is enough pinned space to make this reservation */ | ||
3446 | spin_lock(&space_info->lock); | ||
3447 | if (space_info->bytes_pinned >= bytes) { | ||
3448 | spin_unlock(&space_info->lock); | ||
3449 | goto commit; | ||
3450 | } | ||
3451 | spin_unlock(&space_info->lock); | ||
3452 | |||
3453 | /* | ||
3454 | * See if there is some space in the delayed insertion reservation for | ||
3455 | * this reservation. | ||
3456 | */ | ||
3457 | if (space_info != delayed_rsv->space_info) | ||
3458 | return -ENOSPC; | ||
3459 | |||
3460 | spin_lock(&delayed_rsv->lock); | ||
3461 | if (delayed_rsv->size < bytes) { | ||
3462 | spin_unlock(&delayed_rsv->lock); | ||
3463 | return -ENOSPC; | ||
3464 | } | ||
3465 | spin_unlock(&delayed_rsv->lock); | ||
3466 | |||
3467 | commit: | ||
3468 | trans = btrfs_join_transaction(root); | ||
3469 | if (IS_ERR(trans)) | ||
3470 | return -ENOSPC; | ||
3471 | |||
3472 | return btrfs_commit_transaction(trans, root); | ||
3473 | } | ||
3474 | |||
3475 | /** | ||
3476 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
3477 | * @root - the root we're allocating for | ||
3478 | * @block_rsv - the block_rsv we're allocating for | ||
3479 | * @orig_bytes - the number of bytes we want | ||
3480 | * @flush - wether or not we can flush to make our reservation | ||
3481 | * | ||
3482 | * This will reserve orgi_bytes number of bytes from the space info associated | ||
3483 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
3484 | * flush out space to make room. It will do this by flushing delalloc if | ||
3485 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
3486 | * regain reservations will be made and this will fail if there is not enough | ||
3487 | * space already. | ||
3488 | */ | ||
3489 | static int reserve_metadata_bytes(struct btrfs_root *root, | ||
3396 | struct btrfs_block_rsv *block_rsv, | 3490 | struct btrfs_block_rsv *block_rsv, |
3397 | u64 orig_bytes, int flush) | 3491 | u64 orig_bytes, int flush) |
3398 | { | 3492 | { |
3399 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3493 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3400 | u64 unused; | 3494 | u64 used; |
3401 | u64 num_bytes = orig_bytes; | 3495 | u64 num_bytes = orig_bytes; |
3402 | int retries = 0; | 3496 | int retries = 0; |
3403 | int ret = 0; | 3497 | int ret = 0; |
3404 | bool committed = false; | 3498 | bool committed = false; |
3405 | bool flushing = false; | 3499 | bool flushing = false; |
3500 | bool wait_ordered = false; | ||
3406 | 3501 | ||
3407 | again: | 3502 | again: |
3408 | ret = 0; | 3503 | ret = 0; |
@@ -3419,7 +3514,7 @@ again: | |||
3419 | * deadlock since we are waiting for the flusher to finish, but | 3514 | * deadlock since we are waiting for the flusher to finish, but |
3420 | * hold the current transaction open. | 3515 | * hold the current transaction open. |
3421 | */ | 3516 | */ |
3422 | if (trans) | 3517 | if (current->journal_info) |
3423 | return -EAGAIN; | 3518 | return -EAGAIN; |
3424 | ret = wait_event_interruptible(space_info->wait, | 3519 | ret = wait_event_interruptible(space_info->wait, |
3425 | !space_info->flush); | 3520 | !space_info->flush); |
@@ -3431,9 +3526,9 @@ again: | |||
3431 | } | 3526 | } |
3432 | 3527 | ||
3433 | ret = -ENOSPC; | 3528 | ret = -ENOSPC; |
3434 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3529 | used = space_info->bytes_used + space_info->bytes_reserved + |
3435 | space_info->bytes_pinned + space_info->bytes_readonly + | 3530 | space_info->bytes_pinned + space_info->bytes_readonly + |
3436 | space_info->bytes_may_use; | 3531 | space_info->bytes_may_use; |
3437 | 3532 | ||
3438 | /* | 3533 | /* |
3439 | * The idea here is that we've not already over-reserved the block group | 3534 | * The idea here is that we've not already over-reserved the block group |
@@ -3442,10 +3537,9 @@ again: | |||
3442 | * lets start flushing stuff first and then come back and try to make | 3537 | * lets start flushing stuff first and then come back and try to make |
3443 | * our reservation. | 3538 | * our reservation. |
3444 | */ | 3539 | */ |
3445 | if (unused <= space_info->total_bytes) { | 3540 | if (used <= space_info->total_bytes) { |
3446 | unused = space_info->total_bytes - unused; | 3541 | if (used + orig_bytes <= space_info->total_bytes) { |
3447 | if (unused >= num_bytes) { | 3542 | space_info->bytes_may_use += orig_bytes; |
3448 | space_info->bytes_reserved += orig_bytes; | ||
3449 | ret = 0; | 3543 | ret = 0; |
3450 | } else { | 3544 | } else { |
3451 | /* | 3545 | /* |
@@ -3461,10 +3555,64 @@ again: | |||
3461 | * amount plus the amount of bytes that we need for this | 3555 | * amount plus the amount of bytes that we need for this |
3462 | * reservation. | 3556 | * reservation. |
3463 | */ | 3557 | */ |
3464 | num_bytes = unused - space_info->total_bytes + | 3558 | wait_ordered = true; |
3559 | num_bytes = used - space_info->total_bytes + | ||
3465 | (orig_bytes * (retries + 1)); | 3560 | (orig_bytes * (retries + 1)); |
3466 | } | 3561 | } |
3467 | 3562 | ||
3563 | if (ret) { | ||
3564 | u64 profile = btrfs_get_alloc_profile(root, 0); | ||
3565 | u64 avail; | ||
3566 | |||
3567 | /* | ||
3568 | * If we have a lot of space that's pinned, don't bother doing | ||
3569 | * the overcommit dance yet and just commit the transaction. | ||
3570 | */ | ||
3571 | avail = (space_info->total_bytes - space_info->bytes_used) * 8; | ||
3572 | do_div(avail, 10); | ||
3573 | if (space_info->bytes_pinned >= avail && flush && !committed) { | ||
3574 | space_info->flush = 1; | ||
3575 | flushing = true; | ||
3576 | spin_unlock(&space_info->lock); | ||
3577 | ret = may_commit_transaction(root, space_info, | ||
3578 | orig_bytes, 1); | ||
3579 | if (ret) | ||
3580 | goto out; | ||
3581 | committed = true; | ||
3582 | goto again; | ||
3583 | } | ||
3584 | |||
3585 | spin_lock(&root->fs_info->free_chunk_lock); | ||
3586 | avail = root->fs_info->free_chunk_space; | ||
3587 | |||
3588 | /* | ||
3589 | * If we have dup, raid1 or raid10 then only half of the free | ||
3590 | * space is actually useable. | ||
3591 | */ | ||
3592 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
3593 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3594 | BTRFS_BLOCK_GROUP_RAID10)) | ||
3595 | avail >>= 1; | ||
3596 | |||
3597 | /* | ||
3598 | * If we aren't flushing don't let us overcommit too much, say | ||
3599 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
3600 | * 1/2 of the space. | ||
3601 | */ | ||
3602 | if (flush) | ||
3603 | avail >>= 3; | ||
3604 | else | ||
3605 | avail >>= 1; | ||
3606 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
3607 | |||
3608 | if (used + num_bytes < space_info->total_bytes + avail) { | ||
3609 | space_info->bytes_may_use += orig_bytes; | ||
3610 | ret = 0; | ||
3611 | } else { | ||
3612 | wait_ordered = true; | ||
3613 | } | ||
3614 | } | ||
3615 | |||
3468 | /* | 3616 | /* |
3469 | * Couldn't make our reservation, save our place so while we're trying | 3617 | * Couldn't make our reservation, save our place so while we're trying |
3470 | * to reclaim space we can actually use it instead of somebody else | 3618 | * to reclaim space we can actually use it instead of somebody else |
@@ -3484,7 +3632,7 @@ again: | |||
3484 | * We do synchronous shrinking since we don't actually unreserve | 3632 | * We do synchronous shrinking since we don't actually unreserve |
3485 | * metadata until after the IO is completed. | 3633 | * metadata until after the IO is completed. |
3486 | */ | 3634 | */ |
3487 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3635 | ret = shrink_delalloc(root, num_bytes, wait_ordered); |
3488 | if (ret < 0) | 3636 | if (ret < 0) |
3489 | goto out; | 3637 | goto out; |
3490 | 3638 | ||
@@ -3496,35 +3644,17 @@ again: | |||
3496 | * so go back around and try again. | 3644 | * so go back around and try again. |
3497 | */ | 3645 | */ |
3498 | if (retries < 2) { | 3646 | if (retries < 2) { |
3647 | wait_ordered = true; | ||
3499 | retries++; | 3648 | retries++; |
3500 | goto again; | 3649 | goto again; |
3501 | } | 3650 | } |
3502 | 3651 | ||
3503 | /* | ||
3504 | * Not enough space to be reclaimed, don't bother committing the | ||
3505 | * transaction. | ||
3506 | */ | ||
3507 | spin_lock(&space_info->lock); | ||
3508 | if (space_info->bytes_pinned < orig_bytes) | ||
3509 | ret = -ENOSPC; | ||
3510 | spin_unlock(&space_info->lock); | ||
3511 | if (ret) | ||
3512 | goto out; | ||
3513 | |||
3514 | ret = -EAGAIN; | ||
3515 | if (trans) | ||
3516 | goto out; | ||
3517 | |||
3518 | ret = -ENOSPC; | 3652 | ret = -ENOSPC; |
3519 | if (committed) | 3653 | if (committed) |
3520 | goto out; | 3654 | goto out; |
3521 | 3655 | ||
3522 | trans = btrfs_join_transaction(root); | 3656 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); |
3523 | if (IS_ERR(trans)) | ||
3524 | goto out; | ||
3525 | ret = btrfs_commit_transaction(trans, root); | ||
3526 | if (!ret) { | 3657 | if (!ret) { |
3527 | trans = NULL; | ||
3528 | committed = true; | 3658 | committed = true; |
3529 | goto again; | 3659 | goto again; |
3530 | } | 3660 | } |
@@ -3542,10 +3672,12 @@ out: | |||
3542 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | 3672 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, |
3543 | struct btrfs_root *root) | 3673 | struct btrfs_root *root) |
3544 | { | 3674 | { |
3545 | struct btrfs_block_rsv *block_rsv; | 3675 | struct btrfs_block_rsv *block_rsv = NULL; |
3546 | if (root->ref_cows) | 3676 | |
3677 | if (root->ref_cows || root == root->fs_info->csum_root) | ||
3547 | block_rsv = trans->block_rsv; | 3678 | block_rsv = trans->block_rsv; |
3548 | else | 3679 | |
3680 | if (!block_rsv) | ||
3549 | block_rsv = root->block_rsv; | 3681 | block_rsv = root->block_rsv; |
3550 | 3682 | ||
3551 | if (!block_rsv) | 3683 | if (!block_rsv) |
@@ -3616,7 +3748,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
3616 | } | 3748 | } |
3617 | if (num_bytes) { | 3749 | if (num_bytes) { |
3618 | spin_lock(&space_info->lock); | 3750 | spin_lock(&space_info->lock); |
3619 | space_info->bytes_reserved -= num_bytes; | 3751 | space_info->bytes_may_use -= num_bytes; |
3620 | space_info->reservation_progress++; | 3752 | space_info->reservation_progress++; |
3621 | spin_unlock(&space_info->lock); | 3753 | spin_unlock(&space_info->lock); |
3622 | } | 3754 | } |
@@ -3640,9 +3772,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | |||
3640 | { | 3772 | { |
3641 | memset(rsv, 0, sizeof(*rsv)); | 3773 | memset(rsv, 0, sizeof(*rsv)); |
3642 | spin_lock_init(&rsv->lock); | 3774 | spin_lock_init(&rsv->lock); |
3643 | atomic_set(&rsv->usage, 1); | ||
3644 | rsv->priority = 6; | ||
3645 | INIT_LIST_HEAD(&rsv->list); | ||
3646 | } | 3775 | } |
3647 | 3776 | ||
3648 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | 3777 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) |
@@ -3663,38 +3792,38 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
3663 | void btrfs_free_block_rsv(struct btrfs_root *root, | 3792 | void btrfs_free_block_rsv(struct btrfs_root *root, |
3664 | struct btrfs_block_rsv *rsv) | 3793 | struct btrfs_block_rsv *rsv) |
3665 | { | 3794 | { |
3666 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | 3795 | btrfs_block_rsv_release(root, rsv, (u64)-1); |
3667 | btrfs_block_rsv_release(root, rsv, (u64)-1); | 3796 | kfree(rsv); |
3668 | if (!rsv->durable) | ||
3669 | kfree(rsv); | ||
3670 | } | ||
3671 | } | 3797 | } |
3672 | 3798 | ||
3673 | /* | 3799 | int btrfs_block_rsv_add(struct btrfs_root *root, |
3674 | * make the block_rsv struct be able to capture freed space. | 3800 | struct btrfs_block_rsv *block_rsv, |
3675 | * the captured space will re-add to the the block_rsv struct | 3801 | u64 num_bytes) |
3676 | * after transaction commit | ||
3677 | */ | ||
3678 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
3679 | struct btrfs_block_rsv *block_rsv) | ||
3680 | { | 3802 | { |
3681 | block_rsv->durable = 1; | 3803 | int ret; |
3682 | mutex_lock(&fs_info->durable_block_rsv_mutex); | 3804 | |
3683 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | 3805 | if (num_bytes == 0) |
3684 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | 3806 | return 0; |
3807 | |||
3808 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); | ||
3809 | if (!ret) { | ||
3810 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3811 | return 0; | ||
3812 | } | ||
3813 | |||
3814 | return ret; | ||
3685 | } | 3815 | } |
3686 | 3816 | ||
3687 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | 3817 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, |
3688 | struct btrfs_root *root, | 3818 | struct btrfs_block_rsv *block_rsv, |
3689 | struct btrfs_block_rsv *block_rsv, | 3819 | u64 num_bytes) |
3690 | u64 num_bytes) | ||
3691 | { | 3820 | { |
3692 | int ret; | 3821 | int ret; |
3693 | 3822 | ||
3694 | if (num_bytes == 0) | 3823 | if (num_bytes == 0) |
3695 | return 0; | 3824 | return 0; |
3696 | 3825 | ||
3697 | ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); | 3826 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0); |
3698 | if (!ret) { | 3827 | if (!ret) { |
3699 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3828 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
3700 | return 0; | 3829 | return 0; |
@@ -3703,55 +3832,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | |||
3703 | return ret; | 3832 | return ret; |
3704 | } | 3833 | } |
3705 | 3834 | ||
3706 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | 3835 | int btrfs_block_rsv_check(struct btrfs_root *root, |
3707 | struct btrfs_root *root, | 3836 | struct btrfs_block_rsv *block_rsv, int min_factor) |
3708 | struct btrfs_block_rsv *block_rsv, | ||
3709 | u64 min_reserved, int min_factor) | ||
3710 | { | 3837 | { |
3711 | u64 num_bytes = 0; | 3838 | u64 num_bytes = 0; |
3712 | int commit_trans = 0; | ||
3713 | int ret = -ENOSPC; | 3839 | int ret = -ENOSPC; |
3714 | 3840 | ||
3715 | if (!block_rsv) | 3841 | if (!block_rsv) |
3716 | return 0; | 3842 | return 0; |
3717 | 3843 | ||
3718 | spin_lock(&block_rsv->lock); | 3844 | spin_lock(&block_rsv->lock); |
3719 | if (min_factor > 0) | 3845 | num_bytes = div_factor(block_rsv->size, min_factor); |
3720 | num_bytes = div_factor(block_rsv->size, min_factor); | 3846 | if (block_rsv->reserved >= num_bytes) |
3721 | if (min_reserved > num_bytes) | 3847 | ret = 0; |
3722 | num_bytes = min_reserved; | 3848 | spin_unlock(&block_rsv->lock); |
3723 | 3849 | ||
3724 | if (block_rsv->reserved >= num_bytes) { | 3850 | return ret; |
3851 | } | ||
3852 | |||
3853 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
3854 | struct btrfs_block_rsv *block_rsv, | ||
3855 | u64 min_reserved) | ||
3856 | { | ||
3857 | u64 num_bytes = 0; | ||
3858 | int ret = -ENOSPC; | ||
3859 | |||
3860 | if (!block_rsv) | ||
3861 | return 0; | ||
3862 | |||
3863 | spin_lock(&block_rsv->lock); | ||
3864 | num_bytes = min_reserved; | ||
3865 | if (block_rsv->reserved >= num_bytes) | ||
3725 | ret = 0; | 3866 | ret = 0; |
3726 | } else { | 3867 | else |
3727 | num_bytes -= block_rsv->reserved; | 3868 | num_bytes -= block_rsv->reserved; |
3728 | if (block_rsv->durable && | ||
3729 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
3730 | commit_trans = 1; | ||
3731 | } | ||
3732 | spin_unlock(&block_rsv->lock); | 3869 | spin_unlock(&block_rsv->lock); |
3870 | |||
3733 | if (!ret) | 3871 | if (!ret) |
3734 | return 0; | 3872 | return 0; |
3735 | 3873 | ||
3736 | if (block_rsv->refill_used) { | 3874 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); |
3737 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 3875 | if (!ret) { |
3738 | num_bytes, 0); | 3876 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
3739 | if (!ret) { | ||
3740 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
3741 | return 0; | ||
3742 | } | ||
3743 | } | ||
3744 | |||
3745 | if (commit_trans) { | ||
3746 | if (trans) | ||
3747 | return -EAGAIN; | ||
3748 | trans = btrfs_join_transaction(root); | ||
3749 | BUG_ON(IS_ERR(trans)); | ||
3750 | ret = btrfs_commit_transaction(trans, root); | ||
3751 | return 0; | 3877 | return 0; |
3752 | } | 3878 | } |
3753 | 3879 | ||
3754 | return -ENOSPC; | 3880 | return ret; |
3755 | } | 3881 | } |
3756 | 3882 | ||
3757 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3883 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
@@ -3783,7 +3909,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
3783 | u64 num_bytes; | 3909 | u64 num_bytes; |
3784 | u64 meta_used; | 3910 | u64 meta_used; |
3785 | u64 data_used; | 3911 | u64 data_used; |
3786 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | 3912 | int csum_size = btrfs_super_csum_size(fs_info->super_copy); |
3787 | 3913 | ||
3788 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | 3914 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); |
3789 | spin_lock(&sinfo->lock); | 3915 | spin_lock(&sinfo->lock); |
@@ -3827,12 +3953,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3827 | if (sinfo->total_bytes > num_bytes) { | 3953 | if (sinfo->total_bytes > num_bytes) { |
3828 | num_bytes = sinfo->total_bytes - num_bytes; | 3954 | num_bytes = sinfo->total_bytes - num_bytes; |
3829 | block_rsv->reserved += num_bytes; | 3955 | block_rsv->reserved += num_bytes; |
3830 | sinfo->bytes_reserved += num_bytes; | 3956 | sinfo->bytes_may_use += num_bytes; |
3831 | } | 3957 | } |
3832 | 3958 | ||
3833 | if (block_rsv->reserved >= block_rsv->size) { | 3959 | if (block_rsv->reserved >= block_rsv->size) { |
3834 | num_bytes = block_rsv->reserved - block_rsv->size; | 3960 | num_bytes = block_rsv->reserved - block_rsv->size; |
3835 | sinfo->bytes_reserved -= num_bytes; | 3961 | sinfo->bytes_may_use -= num_bytes; |
3836 | sinfo->reservation_progress++; | 3962 | sinfo->reservation_progress++; |
3837 | block_rsv->reserved = block_rsv->size; | 3963 | block_rsv->reserved = block_rsv->size; |
3838 | block_rsv->full = 1; | 3964 | block_rsv->full = 1; |
@@ -3848,16 +3974,13 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3848 | 3974 | ||
3849 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | 3975 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
3850 | fs_info->chunk_block_rsv.space_info = space_info; | 3976 | fs_info->chunk_block_rsv.space_info = space_info; |
3851 | fs_info->chunk_block_rsv.priority = 10; | ||
3852 | 3977 | ||
3853 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 3978 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
3854 | fs_info->global_block_rsv.space_info = space_info; | 3979 | fs_info->global_block_rsv.space_info = space_info; |
3855 | fs_info->global_block_rsv.priority = 10; | ||
3856 | fs_info->global_block_rsv.refill_used = 1; | ||
3857 | fs_info->delalloc_block_rsv.space_info = space_info; | 3980 | fs_info->delalloc_block_rsv.space_info = space_info; |
3858 | fs_info->trans_block_rsv.space_info = space_info; | 3981 | fs_info->trans_block_rsv.space_info = space_info; |
3859 | fs_info->empty_block_rsv.space_info = space_info; | 3982 | fs_info->empty_block_rsv.space_info = space_info; |
3860 | fs_info->empty_block_rsv.priority = 10; | 3983 | fs_info->delayed_block_rsv.space_info = space_info; |
3861 | 3984 | ||
3862 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | 3985 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; |
3863 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | 3986 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; |
@@ -3865,10 +3988,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3865 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | 3988 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; |
3866 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | 3989 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; |
3867 | 3990 | ||
3868 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
3869 | |||
3870 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
3871 | |||
3872 | update_global_block_rsv(fs_info); | 3991 | update_global_block_rsv(fs_info); |
3873 | } | 3992 | } |
3874 | 3993 | ||
@@ -3881,37 +4000,8 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3881 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | 4000 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); |
3882 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | 4001 | WARN_ON(fs_info->chunk_block_rsv.size > 0); |
3883 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | 4002 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
3884 | } | 4003 | WARN_ON(fs_info->delayed_block_rsv.size > 0); |
3885 | 4004 | WARN_ON(fs_info->delayed_block_rsv.reserved > 0); | |
3886 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3887 | struct btrfs_root *root, | ||
3888 | struct btrfs_block_rsv *rsv) | ||
3889 | { | ||
3890 | struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; | ||
3891 | u64 num_bytes; | ||
3892 | int ret; | ||
3893 | |||
3894 | /* | ||
3895 | * Truncate should be freeing data, but give us 2 items just in case it | ||
3896 | * needs to use some space. We may want to be smarter about this in the | ||
3897 | * future. | ||
3898 | */ | ||
3899 | num_bytes = btrfs_calc_trans_metadata_size(root, 2); | ||
3900 | |||
3901 | /* We already have enough bytes, just return */ | ||
3902 | if (rsv->reserved >= num_bytes) | ||
3903 | return 0; | ||
3904 | |||
3905 | num_bytes -= rsv->reserved; | ||
3906 | |||
3907 | /* | ||
3908 | * You should have reserved enough space before hand to do this, so this | ||
3909 | * should not fail. | ||
3910 | */ | ||
3911 | ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); | ||
3912 | BUG_ON(ret); | ||
3913 | |||
3914 | return 0; | ||
3915 | } | 4005 | } |
3916 | 4006 | ||
3917 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 4007 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
@@ -3920,9 +4010,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | |||
3920 | if (!trans->bytes_reserved) | 4010 | if (!trans->bytes_reserved) |
3921 | return; | 4011 | return; |
3922 | 4012 | ||
3923 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | 4013 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); |
3924 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3925 | trans->bytes_reserved); | ||
3926 | trans->bytes_reserved = 0; | 4014 | trans->bytes_reserved = 0; |
3927 | } | 4015 | } |
3928 | 4016 | ||
@@ -3964,11 +4052,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3964 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4052 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3965 | } | 4053 | } |
3966 | 4054 | ||
4055 | /** | ||
4056 | * drop_outstanding_extent - drop an outstanding extent | ||
4057 | * @inode: the inode we're dropping the extent for | ||
4058 | * | ||
4059 | * This is called when we are freeing up an outstanding extent, either called | ||
4060 | * after an error or after an extent is written. This will return the number of | ||
4061 | * reserved extents that need to be freed. This must be called with | ||
4062 | * BTRFS_I(inode)->lock held. | ||
4063 | */ | ||
3967 | static unsigned drop_outstanding_extent(struct inode *inode) | 4064 | static unsigned drop_outstanding_extent(struct inode *inode) |
3968 | { | 4065 | { |
3969 | unsigned dropped_extents = 0; | 4066 | unsigned dropped_extents = 0; |
3970 | 4067 | ||
3971 | spin_lock(&BTRFS_I(inode)->lock); | ||
3972 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | 4068 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); |
3973 | BTRFS_I(inode)->outstanding_extents--; | 4069 | BTRFS_I(inode)->outstanding_extents--; |
3974 | 4070 | ||
@@ -3978,19 +4074,70 @@ static unsigned drop_outstanding_extent(struct inode *inode) | |||
3978 | */ | 4074 | */ |
3979 | if (BTRFS_I(inode)->outstanding_extents >= | 4075 | if (BTRFS_I(inode)->outstanding_extents >= |
3980 | BTRFS_I(inode)->reserved_extents) | 4076 | BTRFS_I(inode)->reserved_extents) |
3981 | goto out; | 4077 | return 0; |
3982 | 4078 | ||
3983 | dropped_extents = BTRFS_I(inode)->reserved_extents - | 4079 | dropped_extents = BTRFS_I(inode)->reserved_extents - |
3984 | BTRFS_I(inode)->outstanding_extents; | 4080 | BTRFS_I(inode)->outstanding_extents; |
3985 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | 4081 | BTRFS_I(inode)->reserved_extents -= dropped_extents; |
3986 | out: | ||
3987 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3988 | return dropped_extents; | 4082 | return dropped_extents; |
3989 | } | 4083 | } |
3990 | 4084 | ||
3991 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 4085 | /** |
4086 | * calc_csum_metadata_size - return the amount of metada space that must be | ||
4087 | * reserved/free'd for the given bytes. | ||
4088 | * @inode: the inode we're manipulating | ||
4089 | * @num_bytes: the number of bytes in question | ||
4090 | * @reserve: 1 if we are reserving space, 0 if we are freeing space | ||
4091 | * | ||
4092 | * This adjusts the number of csum_bytes in the inode and then returns the | ||
4093 | * correct amount of metadata that must either be reserved or freed. We | ||
4094 | * calculate how many checksums we can fit into one leaf and then divide the | ||
4095 | * number of bytes that will need to be checksumed by this value to figure out | ||
4096 | * how many checksums will be required. If we are adding bytes then the number | ||
4097 | * may go up and we will return the number of additional bytes that must be | ||
4098 | * reserved. If it is going down we will return the number of bytes that must | ||
4099 | * be freed. | ||
4100 | * | ||
4101 | * This must be called with BTRFS_I(inode)->lock held. | ||
4102 | */ | ||
4103 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, | ||
4104 | int reserve) | ||
3992 | { | 4105 | { |
3993 | return num_bytes >>= 3; | 4106 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4107 | u64 csum_size; | ||
4108 | int num_csums_per_leaf; | ||
4109 | int num_csums; | ||
4110 | int old_csums; | ||
4111 | |||
4112 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && | ||
4113 | BTRFS_I(inode)->csum_bytes == 0) | ||
4114 | return 0; | ||
4115 | |||
4116 | old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | ||
4117 | if (reserve) | ||
4118 | BTRFS_I(inode)->csum_bytes += num_bytes; | ||
4119 | else | ||
4120 | BTRFS_I(inode)->csum_bytes -= num_bytes; | ||
4121 | csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); | ||
4122 | num_csums_per_leaf = (int)div64_u64(csum_size, | ||
4123 | sizeof(struct btrfs_csum_item) + | ||
4124 | sizeof(struct btrfs_disk_key)); | ||
4125 | num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | ||
4126 | num_csums = num_csums + num_csums_per_leaf - 1; | ||
4127 | num_csums = num_csums / num_csums_per_leaf; | ||
4128 | |||
4129 | old_csums = old_csums + num_csums_per_leaf - 1; | ||
4130 | old_csums = old_csums / num_csums_per_leaf; | ||
4131 | |||
4132 | /* No change, no need to reserve more */ | ||
4133 | if (old_csums == num_csums) | ||
4134 | return 0; | ||
4135 | |||
4136 | if (reserve) | ||
4137 | return btrfs_calc_trans_metadata_size(root, | ||
4138 | num_csums - old_csums); | ||
4139 | |||
4140 | return btrfs_calc_trans_metadata_size(root, old_csums - num_csums); | ||
3994 | } | 4141 | } |
3995 | 4142 | ||
3996 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | 4143 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) |
@@ -3999,9 +4146,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3999 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4146 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
4000 | u64 to_reserve = 0; | 4147 | u64 to_reserve = 0; |
4001 | unsigned nr_extents = 0; | 4148 | unsigned nr_extents = 0; |
4149 | int flush = 1; | ||
4002 | int ret; | 4150 | int ret; |
4003 | 4151 | ||
4004 | if (btrfs_transaction_in_commit(root->fs_info)) | 4152 | if (btrfs_is_free_space_inode(root, inode)) |
4153 | flush = 0; | ||
4154 | |||
4155 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | ||
4005 | schedule_timeout(1); | 4156 | schedule_timeout(1); |
4006 | 4157 | ||
4007 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4158 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
@@ -4017,18 +4168,29 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4017 | 4168 | ||
4018 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4169 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
4019 | } | 4170 | } |
4171 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); | ||
4020 | spin_unlock(&BTRFS_I(inode)->lock); | 4172 | spin_unlock(&BTRFS_I(inode)->lock); |
4021 | 4173 | ||
4022 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4174 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
4023 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | ||
4024 | if (ret) { | 4175 | if (ret) { |
4176 | u64 to_free = 0; | ||
4025 | unsigned dropped; | 4177 | unsigned dropped; |
4178 | |||
4179 | spin_lock(&BTRFS_I(inode)->lock); | ||
4180 | dropped = drop_outstanding_extent(inode); | ||
4181 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4182 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4183 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4184 | |||
4026 | /* | 4185 | /* |
4027 | * We don't need the return value since our reservation failed, | 4186 | * Somebody could have come in and twiddled with the |
4028 | * we just need to clean up our counter. | 4187 | * reservation, so if we have to free more than we would have |
4188 | * reserved from this reservation go ahead and release those | ||
4189 | * bytes. | ||
4029 | */ | 4190 | */ |
4030 | dropped = drop_outstanding_extent(inode); | 4191 | to_free -= to_reserve; |
4031 | WARN_ON(dropped > 1); | 4192 | if (to_free) |
4193 | btrfs_block_rsv_release(root, block_rsv, to_free); | ||
4032 | return ret; | 4194 | return ret; |
4033 | } | 4195 | } |
4034 | 4196 | ||
@@ -4037,6 +4199,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4037 | return 0; | 4199 | return 0; |
4038 | } | 4200 | } |
4039 | 4201 | ||
4202 | /** | ||
4203 | * btrfs_delalloc_release_metadata - release a metadata reservation for an inode | ||
4204 | * @inode: the inode to release the reservation for | ||
4205 | * @num_bytes: the number of bytes we're releasing | ||
4206 | * | ||
4207 | * This will release the metadata reservation for an inode. This can be called | ||
4208 | * once we complete IO for a given set of bytes to release their metadata | ||
4209 | * reservations. | ||
4210 | */ | ||
4040 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4211 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
4041 | { | 4212 | { |
4042 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4213 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -4044,9 +4215,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4044 | unsigned dropped; | 4215 | unsigned dropped; |
4045 | 4216 | ||
4046 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4217 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4218 | spin_lock(&BTRFS_I(inode)->lock); | ||
4047 | dropped = drop_outstanding_extent(inode); | 4219 | dropped = drop_outstanding_extent(inode); |
4048 | 4220 | ||
4049 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4221 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); |
4222 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4050 | if (dropped > 0) | 4223 | if (dropped > 0) |
4051 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4224 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4052 | 4225 | ||
@@ -4054,6 +4227,21 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4054 | to_free); | 4227 | to_free); |
4055 | } | 4228 | } |
4056 | 4229 | ||
4230 | /** | ||
4231 | * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc | ||
4232 | * @inode: inode we're writing to | ||
4233 | * @num_bytes: the number of bytes we want to allocate | ||
4234 | * | ||
4235 | * This will do the following things | ||
4236 | * | ||
4237 | * o reserve space in the data space info for num_bytes | ||
4238 | * o reserve space in the metadata space info based on number of outstanding | ||
4239 | * extents and how much csums will be needed | ||
4240 | * o add to the inodes ->delalloc_bytes | ||
4241 | * o add it to the fs_info's delalloc inodes list. | ||
4242 | * | ||
4243 | * This will return 0 for success and -ENOSPC if there is no space left. | ||
4244 | */ | ||
4057 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | 4245 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) |
4058 | { | 4246 | { |
4059 | int ret; | 4247 | int ret; |
@@ -4071,6 +4259,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | |||
4071 | return 0; | 4259 | return 0; |
4072 | } | 4260 | } |
4073 | 4261 | ||
4262 | /** | ||
4263 | * btrfs_delalloc_release_space - release data and metadata space for delalloc | ||
4264 | * @inode: inode we're releasing space for | ||
4265 | * @num_bytes: the number of bytes we want to free up | ||
4266 | * | ||
4267 | * This must be matched with a call to btrfs_delalloc_reserve_space. This is | ||
4268 | * called in the case that we don't need the metadata AND data reservations | ||
4269 | * anymore. So if there is an error or we insert an inline extent. | ||
4270 | * | ||
4271 | * This function will release the metadata space that was not used and will | ||
4272 | * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes | ||
4273 | * list if there are no delalloc bytes left. | ||
4274 | */ | ||
4074 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | 4275 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) |
4075 | { | 4276 | { |
4076 | btrfs_delalloc_release_metadata(inode, num_bytes); | 4277 | btrfs_delalloc_release_metadata(inode, num_bytes); |
@@ -4090,12 +4291,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4090 | 4291 | ||
4091 | /* block accounting for super block */ | 4292 | /* block accounting for super block */ |
4092 | spin_lock(&info->delalloc_lock); | 4293 | spin_lock(&info->delalloc_lock); |
4093 | old_val = btrfs_super_bytes_used(&info->super_copy); | 4294 | old_val = btrfs_super_bytes_used(info->super_copy); |
4094 | if (alloc) | 4295 | if (alloc) |
4095 | old_val += num_bytes; | 4296 | old_val += num_bytes; |
4096 | else | 4297 | else |
4097 | old_val -= num_bytes; | 4298 | old_val -= num_bytes; |
4098 | btrfs_set_super_bytes_used(&info->super_copy, old_val); | 4299 | btrfs_set_super_bytes_used(info->super_copy, old_val); |
4099 | spin_unlock(&info->delalloc_lock); | 4300 | spin_unlock(&info->delalloc_lock); |
4100 | 4301 | ||
4101 | while (total) { | 4302 | while (total) { |
@@ -4123,7 +4324,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4123 | spin_lock(&cache->space_info->lock); | 4324 | spin_lock(&cache->space_info->lock); |
4124 | spin_lock(&cache->lock); | 4325 | spin_lock(&cache->lock); |
4125 | 4326 | ||
4126 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | 4327 | if (btrfs_test_opt(root, SPACE_CACHE) && |
4127 | cache->disk_cache_state < BTRFS_DC_CLEAR) | 4328 | cache->disk_cache_state < BTRFS_DC_CLEAR) |
4128 | cache->disk_cache_state = BTRFS_DC_CLEAR; | 4329 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
4129 | 4330 | ||
@@ -4135,7 +4336,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4135 | btrfs_set_block_group_used(&cache->item, old_val); | 4336 | btrfs_set_block_group_used(&cache->item, old_val); |
4136 | cache->reserved -= num_bytes; | 4337 | cache->reserved -= num_bytes; |
4137 | cache->space_info->bytes_reserved -= num_bytes; | 4338 | cache->space_info->bytes_reserved -= num_bytes; |
4138 | cache->space_info->reservation_progress++; | ||
4139 | cache->space_info->bytes_used += num_bytes; | 4339 | cache->space_info->bytes_used += num_bytes; |
4140 | cache->space_info->disk_used += num_bytes * factor; | 4340 | cache->space_info->disk_used += num_bytes * factor; |
4141 | spin_unlock(&cache->lock); | 4341 | spin_unlock(&cache->lock); |
@@ -4187,7 +4387,6 @@ static int pin_down_extent(struct btrfs_root *root, | |||
4187 | if (reserved) { | 4387 | if (reserved) { |
4188 | cache->reserved -= num_bytes; | 4388 | cache->reserved -= num_bytes; |
4189 | cache->space_info->bytes_reserved -= num_bytes; | 4389 | cache->space_info->bytes_reserved -= num_bytes; |
4190 | cache->space_info->reservation_progress++; | ||
4191 | } | 4390 | } |
4192 | spin_unlock(&cache->lock); | 4391 | spin_unlock(&cache->lock); |
4193 | spin_unlock(&cache->space_info->lock); | 4392 | spin_unlock(&cache->space_info->lock); |
@@ -4215,45 +4414,82 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
4215 | } | 4414 | } |
4216 | 4415 | ||
4217 | /* | 4416 | /* |
4218 | * update size of reserved extents. this function may return -EAGAIN | 4417 | * this function must be called within transaction |
4219 | * if 'reserve' is true or 'sinfo' is false. | 4418 | */ |
4419 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | ||
4420 | struct btrfs_root *root, | ||
4421 | u64 bytenr, u64 num_bytes) | ||
4422 | { | ||
4423 | struct btrfs_block_group_cache *cache; | ||
4424 | |||
4425 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
4426 | BUG_ON(!cache); | ||
4427 | |||
4428 | /* | ||
4429 | * pull in the free space cache (if any) so that our pin | ||
4430 | * removes the free space from the cache. We have load_only set | ||
4431 | * to one because the slow code to read in the free extents does check | ||
4432 | * the pinned extents. | ||
4433 | */ | ||
4434 | cache_block_group(cache, trans, root, 1); | ||
4435 | |||
4436 | pin_down_extent(root, cache, bytenr, num_bytes, 0); | ||
4437 | |||
4438 | /* remove us from the free space cache (if we're there at all) */ | ||
4439 | btrfs_remove_free_space(cache, bytenr, num_bytes); | ||
4440 | btrfs_put_block_group(cache); | ||
4441 | return 0; | ||
4442 | } | ||
4443 | |||
4444 | /** | ||
4445 | * btrfs_update_reserved_bytes - update the block_group and space info counters | ||
4446 | * @cache: The cache we are manipulating | ||
4447 | * @num_bytes: The number of bytes in question | ||
4448 | * @reserve: One of the reservation enums | ||
4449 | * | ||
4450 | * This is called by the allocator when it reserves space, or by somebody who is | ||
4451 | * freeing space that was never actually used on disk. For example if you | ||
4452 | * reserve some space for a new leaf in transaction A and before transaction A | ||
4453 | * commits you free that leaf, you call this with reserve set to 0 in order to | ||
4454 | * clear the reservation. | ||
4455 | * | ||
4456 | * Metadata reservations should be called with RESERVE_ALLOC so we do the proper | ||
4457 | * ENOSPC accounting. For data we handle the reservation through clearing the | ||
4458 | * delalloc bits in the io_tree. We have to do this since we could end up | ||
4459 | * allocating less disk space for the amount of data we have reserved in the | ||
4460 | * case of compression. | ||
4461 | * | ||
4462 | * If this is a reservation and the block group has become read only we cannot | ||
4463 | * make the reservation and return -EAGAIN, otherwise this function always | ||
4464 | * succeeds. | ||
4220 | */ | 4465 | */ |
4221 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4466 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
4222 | u64 num_bytes, int reserve, int sinfo) | 4467 | u64 num_bytes, int reserve) |
4223 | { | 4468 | { |
4469 | struct btrfs_space_info *space_info = cache->space_info; | ||
4224 | int ret = 0; | 4470 | int ret = 0; |
4225 | if (sinfo) { | 4471 | spin_lock(&space_info->lock); |
4226 | struct btrfs_space_info *space_info = cache->space_info; | 4472 | spin_lock(&cache->lock); |
4227 | spin_lock(&space_info->lock); | 4473 | if (reserve != RESERVE_FREE) { |
4228 | spin_lock(&cache->lock); | ||
4229 | if (reserve) { | ||
4230 | if (cache->ro) { | ||
4231 | ret = -EAGAIN; | ||
4232 | } else { | ||
4233 | cache->reserved += num_bytes; | ||
4234 | space_info->bytes_reserved += num_bytes; | ||
4235 | } | ||
4236 | } else { | ||
4237 | if (cache->ro) | ||
4238 | space_info->bytes_readonly += num_bytes; | ||
4239 | cache->reserved -= num_bytes; | ||
4240 | space_info->bytes_reserved -= num_bytes; | ||
4241 | space_info->reservation_progress++; | ||
4242 | } | ||
4243 | spin_unlock(&cache->lock); | ||
4244 | spin_unlock(&space_info->lock); | ||
4245 | } else { | ||
4246 | spin_lock(&cache->lock); | ||
4247 | if (cache->ro) { | 4474 | if (cache->ro) { |
4248 | ret = -EAGAIN; | 4475 | ret = -EAGAIN; |
4249 | } else { | 4476 | } else { |
4250 | if (reserve) | 4477 | cache->reserved += num_bytes; |
4251 | cache->reserved += num_bytes; | 4478 | space_info->bytes_reserved += num_bytes; |
4252 | else | 4479 | if (reserve == RESERVE_ALLOC) { |
4253 | cache->reserved -= num_bytes; | 4480 | BUG_ON(space_info->bytes_may_use < num_bytes); |
4481 | space_info->bytes_may_use -= num_bytes; | ||
4482 | } | ||
4254 | } | 4483 | } |
4255 | spin_unlock(&cache->lock); | 4484 | } else { |
4485 | if (cache->ro) | ||
4486 | space_info->bytes_readonly += num_bytes; | ||
4487 | cache->reserved -= num_bytes; | ||
4488 | space_info->bytes_reserved -= num_bytes; | ||
4489 | space_info->reservation_progress++; | ||
4256 | } | 4490 | } |
4491 | spin_unlock(&cache->lock); | ||
4492 | spin_unlock(&space_info->lock); | ||
4257 | return ret; | 4493 | return ret; |
4258 | } | 4494 | } |
4259 | 4495 | ||
@@ -4319,13 +4555,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
4319 | spin_lock(&cache->lock); | 4555 | spin_lock(&cache->lock); |
4320 | cache->pinned -= len; | 4556 | cache->pinned -= len; |
4321 | cache->space_info->bytes_pinned -= len; | 4557 | cache->space_info->bytes_pinned -= len; |
4322 | if (cache->ro) { | 4558 | if (cache->ro) |
4323 | cache->space_info->bytes_readonly += len; | 4559 | cache->space_info->bytes_readonly += len; |
4324 | } else if (cache->reserved_pinned > 0) { | ||
4325 | len = min(len, cache->reserved_pinned); | ||
4326 | cache->reserved_pinned -= len; | ||
4327 | cache->space_info->bytes_reserved += len; | ||
4328 | } | ||
4329 | spin_unlock(&cache->lock); | 4560 | spin_unlock(&cache->lock); |
4330 | spin_unlock(&cache->space_info->lock); | 4561 | spin_unlock(&cache->space_info->lock); |
4331 | } | 4562 | } |
@@ -4340,11 +4571,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4340 | { | 4571 | { |
4341 | struct btrfs_fs_info *fs_info = root->fs_info; | 4572 | struct btrfs_fs_info *fs_info = root->fs_info; |
4342 | struct extent_io_tree *unpin; | 4573 | struct extent_io_tree *unpin; |
4343 | struct btrfs_block_rsv *block_rsv; | ||
4344 | struct btrfs_block_rsv *next_rsv; | ||
4345 | u64 start; | 4574 | u64 start; |
4346 | u64 end; | 4575 | u64 end; |
4347 | int idx; | ||
4348 | int ret; | 4576 | int ret; |
4349 | 4577 | ||
4350 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4578 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
@@ -4367,30 +4595,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
4367 | cond_resched(); | 4595 | cond_resched(); |
4368 | } | 4596 | } |
4369 | 4597 | ||
4370 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
4371 | list_for_each_entry_safe(block_rsv, next_rsv, | ||
4372 | &fs_info->durable_block_rsv_list, list) { | ||
4373 | |||
4374 | idx = trans->transid & 0x1; | ||
4375 | if (block_rsv->freed[idx] > 0) { | ||
4376 | block_rsv_add_bytes(block_rsv, | ||
4377 | block_rsv->freed[idx], 0); | ||
4378 | block_rsv->freed[idx] = 0; | ||
4379 | } | ||
4380 | if (atomic_read(&block_rsv->usage) == 0) { | ||
4381 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); | ||
4382 | |||
4383 | if (block_rsv->freed[0] == 0 && | ||
4384 | block_rsv->freed[1] == 0) { | ||
4385 | list_del_init(&block_rsv->list); | ||
4386 | kfree(block_rsv); | ||
4387 | } | ||
4388 | } else { | ||
4389 | btrfs_block_rsv_release(root, block_rsv, 0); | ||
4390 | } | ||
4391 | } | ||
4392 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
4393 | |||
4394 | return 0; | 4598 | return 0; |
4395 | } | 4599 | } |
4396 | 4600 | ||
@@ -4668,7 +4872,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4668 | struct extent_buffer *buf, | 4872 | struct extent_buffer *buf, |
4669 | u64 parent, int last_ref) | 4873 | u64 parent, int last_ref) |
4670 | { | 4874 | { |
4671 | struct btrfs_block_rsv *block_rsv; | ||
4672 | struct btrfs_block_group_cache *cache = NULL; | 4875 | struct btrfs_block_group_cache *cache = NULL; |
4673 | int ret; | 4876 | int ret; |
4674 | 4877 | ||
@@ -4683,64 +4886,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
4683 | if (!last_ref) | 4886 | if (!last_ref) |
4684 | return; | 4887 | return; |
4685 | 4888 | ||
4686 | block_rsv = get_block_rsv(trans, root); | ||
4687 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | 4889 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); |
4688 | if (block_rsv->space_info != cache->space_info) | ||
4689 | goto out; | ||
4690 | 4890 | ||
4691 | if (btrfs_header_generation(buf) == trans->transid) { | 4891 | if (btrfs_header_generation(buf) == trans->transid) { |
4692 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 4892 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
4693 | ret = check_ref_cleanup(trans, root, buf->start); | 4893 | ret = check_ref_cleanup(trans, root, buf->start); |
4694 | if (!ret) | 4894 | if (!ret) |
4695 | goto pin; | 4895 | goto out; |
4696 | } | 4896 | } |
4697 | 4897 | ||
4698 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 4898 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
4699 | pin_down_extent(root, cache, buf->start, buf->len, 1); | 4899 | pin_down_extent(root, cache, buf->start, buf->len, 1); |
4700 | goto pin; | 4900 | goto out; |
4701 | } | 4901 | } |
4702 | 4902 | ||
4703 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4903 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
4704 | 4904 | ||
4705 | btrfs_add_free_space(cache, buf->start, buf->len); | 4905 | btrfs_add_free_space(cache, buf->start, buf->len); |
4706 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); | 4906 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); |
4707 | if (ret == -EAGAIN) { | ||
4708 | /* block group became read-only */ | ||
4709 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); | ||
4710 | goto out; | ||
4711 | } | ||
4712 | |||
4713 | ret = 1; | ||
4714 | spin_lock(&block_rsv->lock); | ||
4715 | if (block_rsv->reserved < block_rsv->size) { | ||
4716 | block_rsv->reserved += buf->len; | ||
4717 | ret = 0; | ||
4718 | } | ||
4719 | spin_unlock(&block_rsv->lock); | ||
4720 | |||
4721 | if (ret) { | ||
4722 | spin_lock(&cache->space_info->lock); | ||
4723 | cache->space_info->bytes_reserved -= buf->len; | ||
4724 | cache->space_info->reservation_progress++; | ||
4725 | spin_unlock(&cache->space_info->lock); | ||
4726 | } | ||
4727 | goto out; | ||
4728 | } | ||
4729 | pin: | ||
4730 | if (block_rsv->durable && !cache->ro) { | ||
4731 | ret = 0; | ||
4732 | spin_lock(&cache->lock); | ||
4733 | if (!cache->ro) { | ||
4734 | cache->reserved_pinned += buf->len; | ||
4735 | ret = 1; | ||
4736 | } | ||
4737 | spin_unlock(&cache->lock); | ||
4738 | |||
4739 | if (ret) { | ||
4740 | spin_lock(&block_rsv->lock); | ||
4741 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
4742 | spin_unlock(&block_rsv->lock); | ||
4743 | } | ||
4744 | } | 4907 | } |
4745 | out: | 4908 | out: |
4746 | /* | 4909 | /* |
@@ -4883,10 +5046,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4883 | int last_ptr_loop = 0; | 5046 | int last_ptr_loop = 0; |
4884 | int loop = 0; | 5047 | int loop = 0; |
4885 | int index = 0; | 5048 | int index = 0; |
5049 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? | ||
5050 | RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; | ||
4886 | bool found_uncached_bg = false; | 5051 | bool found_uncached_bg = false; |
4887 | bool failed_cluster_refill = false; | 5052 | bool failed_cluster_refill = false; |
4888 | bool failed_alloc = false; | 5053 | bool failed_alloc = false; |
4889 | bool use_cluster = true; | 5054 | bool use_cluster = true; |
5055 | bool have_caching_bg = false; | ||
4890 | u64 ideal_cache_percent = 0; | 5056 | u64 ideal_cache_percent = 0; |
4891 | u64 ideal_cache_offset = 0; | 5057 | u64 ideal_cache_offset = 0; |
4892 | 5058 | ||
@@ -4969,6 +5135,7 @@ ideal_cache: | |||
4969 | } | 5135 | } |
4970 | } | 5136 | } |
4971 | search: | 5137 | search: |
5138 | have_caching_bg = false; | ||
4972 | down_read(&space_info->groups_sem); | 5139 | down_read(&space_info->groups_sem); |
4973 | list_for_each_entry(block_group, &space_info->block_groups[index], | 5140 | list_for_each_entry(block_group, &space_info->block_groups[index], |
4974 | list) { | 5141 | list) { |
@@ -5177,6 +5344,8 @@ refill_cluster: | |||
5177 | failed_alloc = true; | 5344 | failed_alloc = true; |
5178 | goto have_block_group; | 5345 | goto have_block_group; |
5179 | } else if (!offset) { | 5346 | } else if (!offset) { |
5347 | if (!cached) | ||
5348 | have_caching_bg = true; | ||
5180 | goto loop; | 5349 | goto loop; |
5181 | } | 5350 | } |
5182 | checks: | 5351 | checks: |
@@ -5202,8 +5371,8 @@ checks: | |||
5202 | search_start - offset); | 5371 | search_start - offset); |
5203 | BUG_ON(offset > search_start); | 5372 | BUG_ON(offset > search_start); |
5204 | 5373 | ||
5205 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, | 5374 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, |
5206 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5375 | alloc_type); |
5207 | if (ret == -EAGAIN) { | 5376 | if (ret == -EAGAIN) { |
5208 | btrfs_add_free_space(block_group, offset, num_bytes); | 5377 | btrfs_add_free_space(block_group, offset, num_bytes); |
5209 | goto loop; | 5378 | goto loop; |
@@ -5227,6 +5396,9 @@ loop: | |||
5227 | } | 5396 | } |
5228 | up_read(&space_info->groups_sem); | 5397 | up_read(&space_info->groups_sem); |
5229 | 5398 | ||
5399 | if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg) | ||
5400 | goto search; | ||
5401 | |||
5230 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | 5402 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) |
5231 | goto search; | 5403 | goto search; |
5232 | 5404 | ||
@@ -5325,7 +5497,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
5325 | int index = 0; | 5497 | int index = 0; |
5326 | 5498 | ||
5327 | spin_lock(&info->lock); | 5499 | spin_lock(&info->lock); |
5328 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 5500 | printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", |
5501 | (unsigned long long)info->flags, | ||
5329 | (unsigned long long)(info->total_bytes - info->bytes_used - | 5502 | (unsigned long long)(info->total_bytes - info->bytes_used - |
5330 | info->bytes_pinned - info->bytes_reserved - | 5503 | info->bytes_pinned - info->bytes_reserved - |
5331 | info->bytes_readonly), | 5504 | info->bytes_readonly), |
@@ -5411,7 +5584,8 @@ again: | |||
5411 | return ret; | 5584 | return ret; |
5412 | } | 5585 | } |
5413 | 5586 | ||
5414 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | 5587 | static int __btrfs_free_reserved_extent(struct btrfs_root *root, |
5588 | u64 start, u64 len, int pin) | ||
5415 | { | 5589 | { |
5416 | struct btrfs_block_group_cache *cache; | 5590 | struct btrfs_block_group_cache *cache; |
5417 | int ret = 0; | 5591 | int ret = 0; |
@@ -5426,8 +5600,12 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5426 | if (btrfs_test_opt(root, DISCARD)) | 5600 | if (btrfs_test_opt(root, DISCARD)) |
5427 | ret = btrfs_discard_extent(root, start, len, NULL); | 5601 | ret = btrfs_discard_extent(root, start, len, NULL); |
5428 | 5602 | ||
5429 | btrfs_add_free_space(cache, start, len); | 5603 | if (pin) |
5430 | btrfs_update_reserved_bytes(cache, len, 0, 1); | 5604 | pin_down_extent(root, cache, start, len, 1); |
5605 | else { | ||
5606 | btrfs_add_free_space(cache, start, len); | ||
5607 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); | ||
5608 | } | ||
5431 | btrfs_put_block_group(cache); | 5609 | btrfs_put_block_group(cache); |
5432 | 5610 | ||
5433 | trace_btrfs_reserved_extent_free(root, start, len); | 5611 | trace_btrfs_reserved_extent_free(root, start, len); |
@@ -5435,6 +5613,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
5435 | return ret; | 5613 | return ret; |
5436 | } | 5614 | } |
5437 | 5615 | ||
5616 | int btrfs_free_reserved_extent(struct btrfs_root *root, | ||
5617 | u64 start, u64 len) | ||
5618 | { | ||
5619 | return __btrfs_free_reserved_extent(root, start, len, 0); | ||
5620 | } | ||
5621 | |||
5622 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | ||
5623 | u64 start, u64 len) | ||
5624 | { | ||
5625 | return __btrfs_free_reserved_extent(root, start, len, 1); | ||
5626 | } | ||
5627 | |||
5438 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | 5628 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
5439 | struct btrfs_root *root, | 5629 | struct btrfs_root *root, |
5440 | u64 parent, u64 root_objectid, | 5630 | u64 parent, u64 root_objectid, |
@@ -5630,7 +5820,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5630 | put_caching_control(caching_ctl); | 5820 | put_caching_control(caching_ctl); |
5631 | } | 5821 | } |
5632 | 5822 | ||
5633 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); | 5823 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, |
5824 | RESERVE_ALLOC_NO_ACCOUNT); | ||
5634 | BUG_ON(ret); | 5825 | BUG_ON(ret); |
5635 | btrfs_put_block_group(block_group); | 5826 | btrfs_put_block_group(block_group); |
5636 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5827 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
@@ -5687,8 +5878,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5687 | block_rsv = get_block_rsv(trans, root); | 5878 | block_rsv = get_block_rsv(trans, root); |
5688 | 5879 | ||
5689 | if (block_rsv->size == 0) { | 5880 | if (block_rsv->size == 0) { |
5690 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 5881 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); |
5691 | blocksize, 0); | ||
5692 | /* | 5882 | /* |
5693 | * If we couldn't reserve metadata bytes try and use some from | 5883 | * If we couldn't reserve metadata bytes try and use some from |
5694 | * the global reserve. | 5884 | * the global reserve. |
@@ -5708,13 +5898,15 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5708 | if (!ret) | 5898 | if (!ret) |
5709 | return block_rsv; | 5899 | return block_rsv; |
5710 | if (ret) { | 5900 | if (ret) { |
5711 | WARN_ON(1); | 5901 | static DEFINE_RATELIMIT_STATE(_rs, |
5712 | ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, | 5902 | DEFAULT_RATELIMIT_INTERVAL, |
5713 | 0); | 5903 | /*DEFAULT_RATELIMIT_BURST*/ 2); |
5904 | if (__ratelimit(&_rs)) { | ||
5905 | printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); | ||
5906 | WARN_ON(1); | ||
5907 | } | ||
5908 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | ||
5714 | if (!ret) { | 5909 | if (!ret) { |
5715 | spin_lock(&block_rsv->lock); | ||
5716 | block_rsv->size += blocksize; | ||
5717 | spin_unlock(&block_rsv->lock); | ||
5718 | return block_rsv; | 5910 | return block_rsv; |
5719 | } else if (ret && block_rsv != global_rsv) { | 5911 | } else if (ret && block_rsv != global_rsv) { |
5720 | ret = block_rsv_use_bytes(global_rsv, blocksize); | 5912 | ret = block_rsv_use_bytes(global_rsv, blocksize); |
@@ -6592,12 +6784,9 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) | |||
6592 | cache->bytes_super - btrfs_block_group_used(&cache->item); | 6784 | cache->bytes_super - btrfs_block_group_used(&cache->item); |
6593 | 6785 | ||
6594 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6786 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
6595 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6787 | sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes + |
6596 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= | 6788 | min_allocable_bytes <= sinfo->total_bytes) { |
6597 | sinfo->total_bytes) { | ||
6598 | sinfo->bytes_readonly += num_bytes; | 6789 | sinfo->bytes_readonly += num_bytes; |
6599 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
6600 | cache->reserved_pinned = 0; | ||
6601 | cache->ro = 1; | 6790 | cache->ro = 1; |
6602 | ret = 0; | 6791 | ret = 0; |
6603 | } | 6792 | } |
@@ -6964,7 +7153,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
6964 | struct btrfs_space_info, | 7153 | struct btrfs_space_info, |
6965 | list); | 7154 | list); |
6966 | if (space_info->bytes_pinned > 0 || | 7155 | if (space_info->bytes_pinned > 0 || |
6967 | space_info->bytes_reserved > 0) { | 7156 | space_info->bytes_reserved > 0 || |
7157 | space_info->bytes_may_use > 0) { | ||
6968 | WARN_ON(1); | 7158 | WARN_ON(1); |
6969 | dump_space_info(space_info, 0, 0); | 7159 | dump_space_info(space_info, 0, 0); |
6970 | } | 7160 | } |
@@ -7006,14 +7196,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7006 | return -ENOMEM; | 7196 | return -ENOMEM; |
7007 | path->reada = 1; | 7197 | path->reada = 1; |
7008 | 7198 | ||
7009 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | 7199 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); |
7010 | if (cache_gen != 0 && | 7200 | if (btrfs_test_opt(root, SPACE_CACHE) && |
7011 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | 7201 | btrfs_super_generation(root->fs_info->super_copy) != cache_gen) |
7012 | need_clear = 1; | 7202 | need_clear = 1; |
7013 | if (btrfs_test_opt(root, CLEAR_CACHE)) | 7203 | if (btrfs_test_opt(root, CLEAR_CACHE)) |
7014 | need_clear = 1; | 7204 | need_clear = 1; |
7015 | if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) | ||
7016 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
7017 | 7205 | ||
7018 | while (1) { | 7206 | while (1) { |
7019 | ret = find_first_block_group(root, path, &key); | 7207 | ret = find_first_block_group(root, path, &key); |
@@ -7252,7 +7440,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7252 | goto out; | 7440 | goto out; |
7253 | } | 7441 | } |
7254 | 7442 | ||
7255 | inode = lookup_free_space_inode(root, block_group, path); | 7443 | inode = lookup_free_space_inode(tree_root, block_group, path); |
7256 | if (!IS_ERR(inode)) { | 7444 | if (!IS_ERR(inode)) { |
7257 | ret = btrfs_orphan_add(trans, inode); | 7445 | ret = btrfs_orphan_add(trans, inode); |
7258 | BUG_ON(ret); | 7446 | BUG_ON(ret); |
@@ -7268,7 +7456,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7268 | spin_unlock(&block_group->lock); | 7456 | spin_unlock(&block_group->lock); |
7269 | } | 7457 | } |
7270 | /* One for our lookup ref */ | 7458 | /* One for our lookup ref */ |
7271 | iput(inode); | 7459 | btrfs_add_delayed_iput(inode); |
7272 | } | 7460 | } |
7273 | 7461 | ||
7274 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 7462 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
@@ -7339,7 +7527,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | |||
7339 | int mixed = 0; | 7527 | int mixed = 0; |
7340 | int ret; | 7528 | int ret; |
7341 | 7529 | ||
7342 | disk_super = &fs_info->super_copy; | 7530 | disk_super = fs_info->super_copy; |
7343 | if (!btrfs_super_root(disk_super)) | 7531 | if (!btrfs_super_root(disk_super)) |
7344 | return 1; | 7532 | return 1; |
7345 | 7533 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 624ef10d36cc..1f87c4d0e7a0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -895,6 +895,194 @@ search_again: | |||
895 | goto again; | 895 | goto again; |
896 | } | 896 | } |
897 | 897 | ||
898 | /** | ||
899 | * convert_extent - convert all bits in a given range from one bit to another | ||
900 | * @tree: the io tree to search | ||
901 | * @start: the start offset in bytes | ||
902 | * @end: the end offset in bytes (inclusive) | ||
903 | * @bits: the bits to set in this range | ||
904 | * @clear_bits: the bits to clear in this range | ||
905 | * @mask: the allocation mask | ||
906 | * | ||
907 | * This will go through and set bits for the given range. If any states exist | ||
908 | * already in this range they are set with the given bit and cleared of the | ||
909 | * clear_bits. This is only meant to be used by things that are mergeable, ie | ||
910 | * converting from say DELALLOC to DIRTY. This is not meant to be used with | ||
911 | * boundary bits like LOCK. | ||
912 | */ | ||
913 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
914 | int bits, int clear_bits, gfp_t mask) | ||
915 | { | ||
916 | struct extent_state *state; | ||
917 | struct extent_state *prealloc = NULL; | ||
918 | struct rb_node *node; | ||
919 | int err = 0; | ||
920 | u64 last_start; | ||
921 | u64 last_end; | ||
922 | |||
923 | again: | ||
924 | if (!prealloc && (mask & __GFP_WAIT)) { | ||
925 | prealloc = alloc_extent_state(mask); | ||
926 | if (!prealloc) | ||
927 | return -ENOMEM; | ||
928 | } | ||
929 | |||
930 | spin_lock(&tree->lock); | ||
931 | /* | ||
932 | * this search will find all the extents that end after | ||
933 | * our range starts. | ||
934 | */ | ||
935 | node = tree_search(tree, start); | ||
936 | if (!node) { | ||
937 | prealloc = alloc_extent_state_atomic(prealloc); | ||
938 | if (!prealloc) | ||
939 | return -ENOMEM; | ||
940 | err = insert_state(tree, prealloc, start, end, &bits); | ||
941 | prealloc = NULL; | ||
942 | BUG_ON(err == -EEXIST); | ||
943 | goto out; | ||
944 | } | ||
945 | state = rb_entry(node, struct extent_state, rb_node); | ||
946 | hit_next: | ||
947 | last_start = state->start; | ||
948 | last_end = state->end; | ||
949 | |||
950 | /* | ||
951 | * | ---- desired range ---- | | ||
952 | * | state | | ||
953 | * | ||
954 | * Just lock what we found and keep going | ||
955 | */ | ||
956 | if (state->start == start && state->end <= end) { | ||
957 | struct rb_node *next_node; | ||
958 | |||
959 | set_state_bits(tree, state, &bits); | ||
960 | clear_state_bit(tree, state, &clear_bits, 0); | ||
961 | |||
962 | merge_state(tree, state); | ||
963 | if (last_end == (u64)-1) | ||
964 | goto out; | ||
965 | |||
966 | start = last_end + 1; | ||
967 | next_node = rb_next(&state->rb_node); | ||
968 | if (next_node && start < end && prealloc && !need_resched()) { | ||
969 | state = rb_entry(next_node, struct extent_state, | ||
970 | rb_node); | ||
971 | if (state->start == start) | ||
972 | goto hit_next; | ||
973 | } | ||
974 | goto search_again; | ||
975 | } | ||
976 | |||
977 | /* | ||
978 | * | ---- desired range ---- | | ||
979 | * | state | | ||
980 | * or | ||
981 | * | ------------- state -------------- | | ||
982 | * | ||
983 | * We need to split the extent we found, and may flip bits on | ||
984 | * second half. | ||
985 | * | ||
986 | * If the extent we found extends past our | ||
987 | * range, we just split and search again. It'll get split | ||
988 | * again the next time though. | ||
989 | * | ||
990 | * If the extent we found is inside our range, we set the | ||
991 | * desired bit on it. | ||
992 | */ | ||
993 | if (state->start < start) { | ||
994 | prealloc = alloc_extent_state_atomic(prealloc); | ||
995 | if (!prealloc) | ||
996 | return -ENOMEM; | ||
997 | err = split_state(tree, state, prealloc, start); | ||
998 | BUG_ON(err == -EEXIST); | ||
999 | prealloc = NULL; | ||
1000 | if (err) | ||
1001 | goto out; | ||
1002 | if (state->end <= end) { | ||
1003 | set_state_bits(tree, state, &bits); | ||
1004 | clear_state_bit(tree, state, &clear_bits, 0); | ||
1005 | merge_state(tree, state); | ||
1006 | if (last_end == (u64)-1) | ||
1007 | goto out; | ||
1008 | start = last_end + 1; | ||
1009 | } | ||
1010 | goto search_again; | ||
1011 | } | ||
1012 | /* | ||
1013 | * | ---- desired range ---- | | ||
1014 | * | state | or | state | | ||
1015 | * | ||
1016 | * There's a hole, we need to insert something in it and | ||
1017 | * ignore the extent we found. | ||
1018 | */ | ||
1019 | if (state->start > start) { | ||
1020 | u64 this_end; | ||
1021 | if (end < last_start) | ||
1022 | this_end = end; | ||
1023 | else | ||
1024 | this_end = last_start - 1; | ||
1025 | |||
1026 | prealloc = alloc_extent_state_atomic(prealloc); | ||
1027 | if (!prealloc) | ||
1028 | return -ENOMEM; | ||
1029 | |||
1030 | /* | ||
1031 | * Avoid to free 'prealloc' if it can be merged with | ||
1032 | * the later extent. | ||
1033 | */ | ||
1034 | err = insert_state(tree, prealloc, start, this_end, | ||
1035 | &bits); | ||
1036 | BUG_ON(err == -EEXIST); | ||
1037 | if (err) { | ||
1038 | free_extent_state(prealloc); | ||
1039 | prealloc = NULL; | ||
1040 | goto out; | ||
1041 | } | ||
1042 | prealloc = NULL; | ||
1043 | start = this_end + 1; | ||
1044 | goto search_again; | ||
1045 | } | ||
1046 | /* | ||
1047 | * | ---- desired range ---- | | ||
1048 | * | state | | ||
1049 | * We need to split the extent, and set the bit | ||
1050 | * on the first half | ||
1051 | */ | ||
1052 | if (state->start <= end && state->end > end) { | ||
1053 | prealloc = alloc_extent_state_atomic(prealloc); | ||
1054 | if (!prealloc) | ||
1055 | return -ENOMEM; | ||
1056 | |||
1057 | err = split_state(tree, state, prealloc, end + 1); | ||
1058 | BUG_ON(err == -EEXIST); | ||
1059 | |||
1060 | set_state_bits(tree, prealloc, &bits); | ||
1061 | clear_state_bit(tree, prealloc, &clear_bits, 0); | ||
1062 | |||
1063 | merge_state(tree, prealloc); | ||
1064 | prealloc = NULL; | ||
1065 | goto out; | ||
1066 | } | ||
1067 | |||
1068 | goto search_again; | ||
1069 | |||
1070 | out: | ||
1071 | spin_unlock(&tree->lock); | ||
1072 | if (prealloc) | ||
1073 | free_extent_state(prealloc); | ||
1074 | |||
1075 | return err; | ||
1076 | |||
1077 | search_again: | ||
1078 | if (start > end) | ||
1079 | goto out; | ||
1080 | spin_unlock(&tree->lock); | ||
1081 | if (mask & __GFP_WAIT) | ||
1082 | cond_resched(); | ||
1083 | goto again; | ||
1084 | } | ||
1085 | |||
898 | /* wrappers around set/clear extent bit */ | 1086 | /* wrappers around set/clear extent bit */ |
899 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 1087 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
900 | gfp_t mask) | 1088 | gfp_t mask) |
@@ -920,7 +1108,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | |||
920 | struct extent_state **cached_state, gfp_t mask) | 1108 | struct extent_state **cached_state, gfp_t mask) |
921 | { | 1109 | { |
922 | return set_extent_bit(tree, start, end, | 1110 | return set_extent_bit(tree, start, end, |
923 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, | 1111 | EXTENT_DELALLOC | EXTENT_UPTODATE, |
924 | 0, NULL, cached_state, mask); | 1112 | 0, NULL, cached_state, mask); |
925 | } | 1113 | } |
926 | 1114 | ||
@@ -2102,7 +2290,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2102 | if (tree->ops && tree->ops->readpage_io_failed_hook) | 2290 | if (tree->ops && tree->ops->readpage_io_failed_hook) |
2103 | ret = tree->ops->readpage_io_failed_hook( | 2291 | ret = tree->ops->readpage_io_failed_hook( |
2104 | bio, page, start, end, | 2292 | bio, page, start, end, |
2105 | failed_mirror, NULL); | 2293 | failed_mirror, state); |
2106 | else | 2294 | else |
2107 | ret = bio_readpage_error(bio, page, start, end, | 2295 | ret = bio_readpage_error(bio, page, start, end, |
2108 | failed_mirror, NULL); | 2296 | failed_mirror, NULL); |
@@ -2511,6 +2699,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2511 | int compressed; | 2699 | int compressed; |
2512 | int write_flags; | 2700 | int write_flags; |
2513 | unsigned long nr_written = 0; | 2701 | unsigned long nr_written = 0; |
2702 | bool fill_delalloc = true; | ||
2514 | 2703 | ||
2515 | if (wbc->sync_mode == WB_SYNC_ALL) | 2704 | if (wbc->sync_mode == WB_SYNC_ALL) |
2516 | write_flags = WRITE_SYNC; | 2705 | write_flags = WRITE_SYNC; |
@@ -2520,6 +2709,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2520 | trace___extent_writepage(page, inode, wbc); | 2709 | trace___extent_writepage(page, inode, wbc); |
2521 | 2710 | ||
2522 | WARN_ON(!PageLocked(page)); | 2711 | WARN_ON(!PageLocked(page)); |
2712 | |||
2713 | ClearPageError(page); | ||
2714 | |||
2523 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2715 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2524 | if (page->index > end_index || | 2716 | if (page->index > end_index || |
2525 | (page->index == end_index && !pg_offset)) { | 2717 | (page->index == end_index && !pg_offset)) { |
@@ -2541,10 +2733,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2541 | 2733 | ||
2542 | set_page_extent_mapped(page); | 2734 | set_page_extent_mapped(page); |
2543 | 2735 | ||
2736 | if (!tree->ops || !tree->ops->fill_delalloc) | ||
2737 | fill_delalloc = false; | ||
2738 | |||
2544 | delalloc_start = start; | 2739 | delalloc_start = start; |
2545 | delalloc_end = 0; | 2740 | delalloc_end = 0; |
2546 | page_started = 0; | 2741 | page_started = 0; |
2547 | if (!epd->extent_locked) { | 2742 | if (!epd->extent_locked && fill_delalloc) { |
2548 | u64 delalloc_to_write = 0; | 2743 | u64 delalloc_to_write = 0; |
2549 | /* | 2744 | /* |
2550 | * make sure the wbc mapping index is at least updated | 2745 | * make sure the wbc mapping index is at least updated |
@@ -2796,10 +2991,16 @@ retry: | |||
2796 | * swizzled back from swapper_space to tmpfs file | 2991 | * swizzled back from swapper_space to tmpfs file |
2797 | * mapping | 2992 | * mapping |
2798 | */ | 2993 | */ |
2799 | if (tree->ops && tree->ops->write_cache_pages_lock_hook) | 2994 | if (tree->ops && |
2800 | tree->ops->write_cache_pages_lock_hook(page); | 2995 | tree->ops->write_cache_pages_lock_hook) { |
2801 | else | 2996 | tree->ops->write_cache_pages_lock_hook(page, |
2802 | lock_page(page); | 2997 | data, flush_fn); |
2998 | } else { | ||
2999 | if (!trylock_page(page)) { | ||
3000 | flush_fn(data); | ||
3001 | lock_page(page); | ||
3002 | } | ||
3003 | } | ||
2803 | 3004 | ||
2804 | if (unlikely(page->mapping != mapping)) { | 3005 | if (unlikely(page->mapping != mapping)) { |
2805 | unlock_page(page); | 3006 | unlock_page(page); |
@@ -3579,6 +3780,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3579 | PAGECACHE_TAG_DIRTY); | 3780 | PAGECACHE_TAG_DIRTY); |
3580 | } | 3781 | } |
3581 | spin_unlock_irq(&page->mapping->tree_lock); | 3782 | spin_unlock_irq(&page->mapping->tree_lock); |
3783 | ClearPageError(page); | ||
3582 | unlock_page(page); | 3784 | unlock_page(page); |
3583 | } | 3785 | } |
3584 | return 0; | 3786 | return 0; |
@@ -3724,8 +3926,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3724 | } | 3926 | } |
3725 | 3927 | ||
3726 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 3928 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
3727 | struct extent_buffer *eb, | 3929 | struct extent_buffer *eb, u64 start, int wait, |
3728 | u64 start, int wait, | ||
3729 | get_extent_t *get_extent, int mirror_num) | 3930 | get_extent_t *get_extent, int mirror_num) |
3730 | { | 3931 | { |
3731 | unsigned long i; | 3932 | unsigned long i; |
@@ -3761,7 +3962,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3761 | num_pages = num_extent_pages(eb->start, eb->len); | 3962 | num_pages = num_extent_pages(eb->start, eb->len); |
3762 | for (i = start_i; i < num_pages; i++) { | 3963 | for (i = start_i; i < num_pages; i++) { |
3763 | page = extent_buffer_page(eb, i); | 3964 | page = extent_buffer_page(eb, i); |
3764 | if (!wait) { | 3965 | if (wait == WAIT_NONE) { |
3765 | if (!trylock_page(page)) | 3966 | if (!trylock_page(page)) |
3766 | goto unlock_exit; | 3967 | goto unlock_exit; |
3767 | } else { | 3968 | } else { |
@@ -3805,7 +4006,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3805 | if (bio) | 4006 | if (bio) |
3806 | submit_one_bio(READ, bio, mirror_num, bio_flags); | 4007 | submit_one_bio(READ, bio, mirror_num, bio_flags); |
3807 | 4008 | ||
3808 | if (ret || !wait) | 4009 | if (ret || wait != WAIT_COMPLETE) |
3809 | return ret; | 4010 | return ret; |
3810 | 4011 | ||
3811 | for (i = start_i; i < num_pages; i++) { | 4012 | for (i = start_i; i < num_pages; i++) { |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a8e20b672922..feb9be0e23bc 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -17,7 +17,8 @@ | |||
17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) |
20 | #define EXTENT_DAMAGED (1 << 13) | 20 | #define EXTENT_NEED_WAIT (1 << 13) |
21 | #define EXTENT_DAMAGED (1 << 14) | ||
21 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 22 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
22 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 23 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
23 | 24 | ||
@@ -33,6 +34,7 @@ | |||
33 | #define EXTENT_BUFFER_BLOCKING 1 | 34 | #define EXTENT_BUFFER_BLOCKING 1 |
34 | #define EXTENT_BUFFER_DIRTY 2 | 35 | #define EXTENT_BUFFER_DIRTY 2 |
35 | #define EXTENT_BUFFER_CORRUPT 3 | 36 | #define EXTENT_BUFFER_CORRUPT 3 |
37 | #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ | ||
36 | 38 | ||
37 | /* these are flags for extent_clear_unlock_delalloc */ | 39 | /* these are flags for extent_clear_unlock_delalloc */ |
38 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 40 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
@@ -86,7 +88,8 @@ struct extent_io_ops { | |||
86 | struct extent_state *other); | 88 | struct extent_state *other); |
87 | void (*split_extent_hook)(struct inode *inode, | 89 | void (*split_extent_hook)(struct inode *inode, |
88 | struct extent_state *orig, u64 split); | 90 | struct extent_state *orig, u64 split); |
89 | int (*write_cache_pages_lock_hook)(struct page *page); | 91 | int (*write_cache_pages_lock_hook)(struct page *page, void *data, |
92 | void (*flush_fn)(void *)); | ||
90 | }; | 93 | }; |
91 | 94 | ||
92 | struct extent_io_tree { | 95 | struct extent_io_tree { |
@@ -215,6 +218,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
215 | gfp_t mask); | 218 | gfp_t mask); |
216 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 219 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
217 | gfp_t mask); | 220 | gfp_t mask); |
221 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
222 | int bits, int clear_bits, gfp_t mask); | ||
218 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 223 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
219 | struct extent_state **cached_state, gfp_t mask); | 224 | struct extent_state **cached_state, gfp_t mask); |
220 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 225 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
@@ -249,6 +254,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
249 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 254 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
250 | u64 start, unsigned long len); | 255 | u64 start, unsigned long len); |
251 | void free_extent_buffer(struct extent_buffer *eb); | 256 | void free_extent_buffer(struct extent_buffer *eb); |
257 | #define WAIT_NONE 0 | ||
258 | #define WAIT_COMPLETE 1 | ||
259 | #define WAIT_PAGE_LOCK 2 | ||
252 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 260 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
253 | struct extent_buffer *eb, u64 start, int wait, | 261 | struct extent_buffer *eb, u64 start, int wait, |
254 | get_extent_t *get_extent, int mirror_num); | 262 | get_extent_t *get_extent, int mirror_num); |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a1cb7821becd..c7fb3a4247d3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -91,8 +91,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
91 | struct btrfs_csum_item *item; | 91 | struct btrfs_csum_item *item; |
92 | struct extent_buffer *leaf; | 92 | struct extent_buffer *leaf; |
93 | u64 csum_offset = 0; | 93 | u64 csum_offset = 0; |
94 | u16 csum_size = | 94 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
95 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
96 | int csums_in_item; | 95 | int csums_in_item; |
97 | 96 | ||
98 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 97 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
@@ -162,8 +161,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
162 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
163 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
164 | u32 diff; | 163 | u32 diff; |
165 | u16 csum_size = | 164 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
166 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
167 | int ret; | 165 | int ret; |
168 | struct btrfs_path *path; | 166 | struct btrfs_path *path; |
169 | struct btrfs_csum_item *item = NULL; | 167 | struct btrfs_csum_item *item = NULL; |
@@ -290,7 +288,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
290 | int ret; | 288 | int ret; |
291 | size_t size; | 289 | size_t size; |
292 | u64 csum_end; | 290 | u64 csum_end; |
293 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | 291 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
294 | 292 | ||
295 | path = btrfs_alloc_path(); | 293 | path = btrfs_alloc_path(); |
296 | if (!path) | 294 | if (!path) |
@@ -492,8 +490,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, | |||
492 | u64 bytenr, u64 len) | 490 | u64 bytenr, u64 len) |
493 | { | 491 | { |
494 | struct extent_buffer *leaf; | 492 | struct extent_buffer *leaf; |
495 | u16 csum_size = | 493 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
496 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
497 | u64 csum_end; | 494 | u64 csum_end; |
498 | u64 end_byte = bytenr + len; | 495 | u64 end_byte = bytenr + len; |
499 | u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; | 496 | u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; |
@@ -549,8 +546,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
549 | u64 csum_end; | 546 | u64 csum_end; |
550 | struct extent_buffer *leaf; | 547 | struct extent_buffer *leaf; |
551 | int ret; | 548 | int ret; |
552 | u16 csum_size = | 549 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
553 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
554 | int blocksize_bits = root->fs_info->sb->s_blocksize_bits; | 550 | int blocksize_bits = root->fs_info->sb->s_blocksize_bits; |
555 | 551 | ||
556 | root = root->fs_info->csum_root; | 552 | root = root->fs_info->csum_root; |
@@ -676,8 +672,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
676 | struct btrfs_sector_sum *sector_sum; | 672 | struct btrfs_sector_sum *sector_sum; |
677 | u32 nritems; | 673 | u32 nritems; |
678 | u32 ins_size; | 674 | u32 ins_size; |
679 | u16 csum_size = | 675 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
680 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
681 | 676 | ||
682 | path = btrfs_alloc_path(); | 677 | path = btrfs_alloc_path(); |
683 | if (!path) | 678 | if (!path) |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a381cd22f518..f2e928289600 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1036,11 +1036,13 @@ out: | |||
1036 | * on error we return an unlocked page and the error value | 1036 | * on error we return an unlocked page and the error value |
1037 | * on success we return a locked page and 0 | 1037 | * on success we return a locked page and 0 |
1038 | */ | 1038 | */ |
1039 | static int prepare_uptodate_page(struct page *page, u64 pos) | 1039 | static int prepare_uptodate_page(struct page *page, u64 pos, |
1040 | bool force_uptodate) | ||
1040 | { | 1041 | { |
1041 | int ret = 0; | 1042 | int ret = 0; |
1042 | 1043 | ||
1043 | if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { | 1044 | if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && |
1045 | !PageUptodate(page)) { | ||
1044 | ret = btrfs_readpage(NULL, page); | 1046 | ret = btrfs_readpage(NULL, page); |
1045 | if (ret) | 1047 | if (ret) |
1046 | return ret; | 1048 | return ret; |
@@ -1061,12 +1063,13 @@ static int prepare_uptodate_page(struct page *page, u64 pos) | |||
1061 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | 1063 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, |
1062 | struct page **pages, size_t num_pages, | 1064 | struct page **pages, size_t num_pages, |
1063 | loff_t pos, unsigned long first_index, | 1065 | loff_t pos, unsigned long first_index, |
1064 | size_t write_bytes) | 1066 | size_t write_bytes, bool force_uptodate) |
1065 | { | 1067 | { |
1066 | struct extent_state *cached_state = NULL; | 1068 | struct extent_state *cached_state = NULL; |
1067 | int i; | 1069 | int i; |
1068 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 1070 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
1069 | struct inode *inode = fdentry(file)->d_inode; | 1071 | struct inode *inode = fdentry(file)->d_inode; |
1072 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
1070 | int err = 0; | 1073 | int err = 0; |
1071 | int faili = 0; | 1074 | int faili = 0; |
1072 | u64 start_pos; | 1075 | u64 start_pos; |
@@ -1078,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1078 | again: | 1081 | again: |
1079 | for (i = 0; i < num_pages; i++) { | 1082 | for (i = 0; i < num_pages; i++) { |
1080 | pages[i] = find_or_create_page(inode->i_mapping, index + i, | 1083 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1081 | GFP_NOFS); | 1084 | mask); |
1082 | if (!pages[i]) { | 1085 | if (!pages[i]) { |
1083 | faili = i - 1; | 1086 | faili = i - 1; |
1084 | err = -ENOMEM; | 1087 | err = -ENOMEM; |
@@ -1086,10 +1089,11 @@ again: | |||
1086 | } | 1089 | } |
1087 | 1090 | ||
1088 | if (i == 0) | 1091 | if (i == 0) |
1089 | err = prepare_uptodate_page(pages[i], pos); | 1092 | err = prepare_uptodate_page(pages[i], pos, |
1093 | force_uptodate); | ||
1090 | if (i == num_pages - 1) | 1094 | if (i == num_pages - 1) |
1091 | err = prepare_uptodate_page(pages[i], | 1095 | err = prepare_uptodate_page(pages[i], |
1092 | pos + write_bytes); | 1096 | pos + write_bytes, false); |
1093 | if (err) { | 1097 | if (err) { |
1094 | page_cache_release(pages[i]); | 1098 | page_cache_release(pages[i]); |
1095 | faili = i - 1; | 1099 | faili = i - 1; |
@@ -1158,6 +1162,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1158 | size_t num_written = 0; | 1162 | size_t num_written = 0; |
1159 | int nrptrs; | 1163 | int nrptrs; |
1160 | int ret = 0; | 1164 | int ret = 0; |
1165 | bool force_page_uptodate = false; | ||
1161 | 1166 | ||
1162 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1167 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
1163 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1168 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
@@ -1200,7 +1205,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1200 | * contents of pages from loop to loop | 1205 | * contents of pages from loop to loop |
1201 | */ | 1206 | */ |
1202 | ret = prepare_pages(root, file, pages, num_pages, | 1207 | ret = prepare_pages(root, file, pages, num_pages, |
1203 | pos, first_index, write_bytes); | 1208 | pos, first_index, write_bytes, |
1209 | force_page_uptodate); | ||
1204 | if (ret) { | 1210 | if (ret) { |
1205 | btrfs_delalloc_release_space(inode, | 1211 | btrfs_delalloc_release_space(inode, |
1206 | num_pages << PAGE_CACHE_SHIFT); | 1212 | num_pages << PAGE_CACHE_SHIFT); |
@@ -1217,12 +1223,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1217 | if (copied < write_bytes) | 1223 | if (copied < write_bytes) |
1218 | nrptrs = 1; | 1224 | nrptrs = 1; |
1219 | 1225 | ||
1220 | if (copied == 0) | 1226 | if (copied == 0) { |
1227 | force_page_uptodate = true; | ||
1221 | dirty_pages = 0; | 1228 | dirty_pages = 0; |
1222 | else | 1229 | } else { |
1230 | force_page_uptodate = false; | ||
1223 | dirty_pages = (copied + offset + | 1231 | dirty_pages = (copied + offset + |
1224 | PAGE_CACHE_SIZE - 1) >> | 1232 | PAGE_CACHE_SIZE - 1) >> |
1225 | PAGE_CACHE_SHIFT; | 1233 | PAGE_CACHE_SHIFT; |
1234 | } | ||
1226 | 1235 | ||
1227 | /* | 1236 | /* |
1228 | * If we had a short copy we need to release the excess delaloc | 1237 | * If we had a short copy we need to release the excess delaloc |
@@ -1607,10 +1616,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1607 | goto out; | 1616 | goto out; |
1608 | } | 1617 | } |
1609 | 1618 | ||
1610 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1611 | if (ret) | ||
1612 | goto out; | ||
1613 | |||
1614 | locked_end = alloc_end - 1; | 1619 | locked_end = alloc_end - 1; |
1615 | while (1) { | 1620 | while (1) { |
1616 | struct btrfs_ordered_extent *ordered; | 1621 | struct btrfs_ordered_extent *ordered; |
@@ -1656,11 +1661,27 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1656 | if (em->block_start == EXTENT_MAP_HOLE || | 1661 | if (em->block_start == EXTENT_MAP_HOLE || |
1657 | (cur_offset >= inode->i_size && | 1662 | (cur_offset >= inode->i_size && |
1658 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 1663 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
1664 | |||
1665 | /* | ||
1666 | * Make sure we have enough space before we do the | ||
1667 | * allocation. | ||
1668 | */ | ||
1669 | ret = btrfs_check_data_free_space(inode, last_byte - | ||
1670 | cur_offset); | ||
1671 | if (ret) { | ||
1672 | free_extent_map(em); | ||
1673 | break; | ||
1674 | } | ||
1675 | |||
1659 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | 1676 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, |
1660 | last_byte - cur_offset, | 1677 | last_byte - cur_offset, |
1661 | 1 << inode->i_blkbits, | 1678 | 1 << inode->i_blkbits, |
1662 | offset + len, | 1679 | offset + len, |
1663 | &alloc_hint); | 1680 | &alloc_hint); |
1681 | |||
1682 | /* Let go of our reservation. */ | ||
1683 | btrfs_free_reserved_data_space(inode, last_byte - | ||
1684 | cur_offset); | ||
1664 | if (ret < 0) { | 1685 | if (ret < 0) { |
1665 | free_extent_map(em); | 1686 | free_extent_map(em); |
1666 | break; | 1687 | break; |
@@ -1686,8 +1707,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1686 | } | 1707 | } |
1687 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 1708 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
1688 | &cached_state, GFP_NOFS); | 1709 | &cached_state, GFP_NOFS); |
1689 | |||
1690 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1691 | out: | 1710 | out: |
1692 | mutex_unlock(&inode->i_mutex); | 1711 | mutex_unlock(&inode->i_mutex); |
1693 | return ret; | 1712 | return ret; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 41ac927401d0..7a15fcfb3e1f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #include <linux/math64.h> | 22 | #include <linux/math64.h> |
23 | #include <linux/ratelimit.h> | ||
23 | #include "ctree.h" | 24 | #include "ctree.h" |
24 | #include "free-space-cache.h" | 25 | #include "free-space-cache.h" |
25 | #include "transaction.h" | 26 | #include "transaction.h" |
@@ -84,6 +85,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
84 | *block_group, struct btrfs_path *path) | 85 | *block_group, struct btrfs_path *path) |
85 | { | 86 | { |
86 | struct inode *inode = NULL; | 87 | struct inode *inode = NULL; |
88 | u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; | ||
87 | 89 | ||
88 | spin_lock(&block_group->lock); | 90 | spin_lock(&block_group->lock); |
89 | if (block_group->inode) | 91 | if (block_group->inode) |
@@ -98,13 +100,14 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 100 | return inode; |
99 | 101 | ||
100 | spin_lock(&block_group->lock); | 102 | spin_lock(&block_group->lock); |
101 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { | 103 | if (!((BTRFS_I(inode)->flags & flags) == flags)) { |
102 | printk(KERN_INFO "Old style space inode found, converting.\n"); | 104 | printk(KERN_INFO "Old style space inode found, converting.\n"); |
103 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; | 105 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | |
106 | BTRFS_INODE_NODATACOW; | ||
104 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | 107 | block_group->disk_cache_state = BTRFS_DC_CLEAR; |
105 | } | 108 | } |
106 | 109 | ||
107 | if (!btrfs_fs_closing(root->fs_info)) { | 110 | if (!block_group->iref) { |
108 | block_group->inode = igrab(inode); | 111 | block_group->inode = igrab(inode); |
109 | block_group->iref = 1; | 112 | block_group->iref = 1; |
110 | } | 113 | } |
@@ -122,12 +125,17 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
122 | struct btrfs_free_space_header *header; | 125 | struct btrfs_free_space_header *header; |
123 | struct btrfs_inode_item *inode_item; | 126 | struct btrfs_inode_item *inode_item; |
124 | struct extent_buffer *leaf; | 127 | struct extent_buffer *leaf; |
128 | u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; | ||
125 | int ret; | 129 | int ret; |
126 | 130 | ||
127 | ret = btrfs_insert_empty_inode(trans, root, path, ino); | 131 | ret = btrfs_insert_empty_inode(trans, root, path, ino); |
128 | if (ret) | 132 | if (ret) |
129 | return ret; | 133 | return ret; |
130 | 134 | ||
135 | /* We inline crc's for the free disk space cache */ | ||
136 | if (ino != BTRFS_FREE_INO_OBJECTID) | ||
137 | flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; | ||
138 | |||
131 | leaf = path->nodes[0]; | 139 | leaf = path->nodes[0]; |
132 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 140 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
133 | struct btrfs_inode_item); | 141 | struct btrfs_inode_item); |
@@ -140,8 +148,7 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
140 | btrfs_set_inode_uid(leaf, inode_item, 0); | 148 | btrfs_set_inode_uid(leaf, inode_item, 0); |
141 | btrfs_set_inode_gid(leaf, inode_item, 0); | 149 | btrfs_set_inode_gid(leaf, inode_item, 0); |
142 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | 150 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); |
143 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | 151 | btrfs_set_inode_flags(leaf, inode_item, flags); |
144 | BTRFS_INODE_PREALLOC); | ||
145 | btrfs_set_inode_nlink(leaf, inode_item, 1); | 152 | btrfs_set_inode_nlink(leaf, inode_item, 1); |
146 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | 153 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); |
147 | btrfs_set_inode_block_group(leaf, inode_item, offset); | 154 | btrfs_set_inode_block_group(leaf, inode_item, offset); |
@@ -191,16 +198,24 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
191 | struct inode *inode) | 198 | struct inode *inode) |
192 | { | 199 | { |
193 | struct btrfs_block_rsv *rsv; | 200 | struct btrfs_block_rsv *rsv; |
201 | u64 needed_bytes; | ||
194 | loff_t oldsize; | 202 | loff_t oldsize; |
195 | int ret = 0; | 203 | int ret = 0; |
196 | 204 | ||
197 | rsv = trans->block_rsv; | 205 | rsv = trans->block_rsv; |
198 | trans->block_rsv = root->orphan_block_rsv; | 206 | trans->block_rsv = &root->fs_info->global_block_rsv; |
199 | ret = btrfs_block_rsv_check(trans, root, | 207 | |
200 | root->orphan_block_rsv, | 208 | /* 1 for slack space, 1 for updating the inode */ |
201 | 0, 5); | 209 | needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + |
202 | if (ret) | 210 | btrfs_calc_trans_metadata_size(root, 1); |
203 | return ret; | 211 | |
212 | spin_lock(&trans->block_rsv->lock); | ||
213 | if (trans->block_rsv->reserved < needed_bytes) { | ||
214 | spin_unlock(&trans->block_rsv->lock); | ||
215 | trans->block_rsv = rsv; | ||
216 | return -ENOSPC; | ||
217 | } | ||
218 | spin_unlock(&trans->block_rsv->lock); | ||
204 | 219 | ||
205 | oldsize = i_size_read(inode); | 220 | oldsize = i_size_read(inode); |
206 | btrfs_i_size_write(inode, 0); | 221 | btrfs_i_size_write(inode, 0); |
@@ -213,13 +228,15 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
213 | ret = btrfs_truncate_inode_items(trans, root, inode, | 228 | ret = btrfs_truncate_inode_items(trans, root, inode, |
214 | 0, BTRFS_EXTENT_DATA_KEY); | 229 | 0, BTRFS_EXTENT_DATA_KEY); |
215 | 230 | ||
216 | trans->block_rsv = rsv; | ||
217 | if (ret) { | 231 | if (ret) { |
232 | trans->block_rsv = rsv; | ||
218 | WARN_ON(1); | 233 | WARN_ON(1); |
219 | return ret; | 234 | return ret; |
220 | } | 235 | } |
221 | 236 | ||
222 | ret = btrfs_update_inode(trans, root, inode); | 237 | ret = btrfs_update_inode(trans, root, inode); |
238 | trans->block_rsv = rsv; | ||
239 | |||
223 | return ret; | 240 | return ret; |
224 | } | 241 | } |
225 | 242 | ||
@@ -242,26 +259,342 @@ static int readahead_cache(struct inode *inode) | |||
242 | return 0; | 259 | return 0; |
243 | } | 260 | } |
244 | 261 | ||
262 | struct io_ctl { | ||
263 | void *cur, *orig; | ||
264 | struct page *page; | ||
265 | struct page **pages; | ||
266 | struct btrfs_root *root; | ||
267 | unsigned long size; | ||
268 | int index; | ||
269 | int num_pages; | ||
270 | unsigned check_crcs:1; | ||
271 | }; | ||
272 | |||
273 | static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, | ||
274 | struct btrfs_root *root) | ||
275 | { | ||
276 | memset(io_ctl, 0, sizeof(struct io_ctl)); | ||
277 | io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
278 | PAGE_CACHE_SHIFT; | ||
279 | io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, | ||
280 | GFP_NOFS); | ||
281 | if (!io_ctl->pages) | ||
282 | return -ENOMEM; | ||
283 | io_ctl->root = root; | ||
284 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) | ||
285 | io_ctl->check_crcs = 1; | ||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | static void io_ctl_free(struct io_ctl *io_ctl) | ||
290 | { | ||
291 | kfree(io_ctl->pages); | ||
292 | } | ||
293 | |||
294 | static void io_ctl_unmap_page(struct io_ctl *io_ctl) | ||
295 | { | ||
296 | if (io_ctl->cur) { | ||
297 | kunmap(io_ctl->page); | ||
298 | io_ctl->cur = NULL; | ||
299 | io_ctl->orig = NULL; | ||
300 | } | ||
301 | } | ||
302 | |||
303 | static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) | ||
304 | { | ||
305 | WARN_ON(io_ctl->cur); | ||
306 | BUG_ON(io_ctl->index >= io_ctl->num_pages); | ||
307 | io_ctl->page = io_ctl->pages[io_ctl->index++]; | ||
308 | io_ctl->cur = kmap(io_ctl->page); | ||
309 | io_ctl->orig = io_ctl->cur; | ||
310 | io_ctl->size = PAGE_CACHE_SIZE; | ||
311 | if (clear) | ||
312 | memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); | ||
313 | } | ||
314 | |||
315 | static void io_ctl_drop_pages(struct io_ctl *io_ctl) | ||
316 | { | ||
317 | int i; | ||
318 | |||
319 | io_ctl_unmap_page(io_ctl); | ||
320 | |||
321 | for (i = 0; i < io_ctl->num_pages; i++) { | ||
322 | ClearPageChecked(io_ctl->pages[i]); | ||
323 | unlock_page(io_ctl->pages[i]); | ||
324 | page_cache_release(io_ctl->pages[i]); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, | ||
329 | int uptodate) | ||
330 | { | ||
331 | struct page *page; | ||
332 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
333 | int i; | ||
334 | |||
335 | for (i = 0; i < io_ctl->num_pages; i++) { | ||
336 | page = find_or_create_page(inode->i_mapping, i, mask); | ||
337 | if (!page) { | ||
338 | io_ctl_drop_pages(io_ctl); | ||
339 | return -ENOMEM; | ||
340 | } | ||
341 | io_ctl->pages[i] = page; | ||
342 | if (uptodate && !PageUptodate(page)) { | ||
343 | btrfs_readpage(NULL, page); | ||
344 | lock_page(page); | ||
345 | if (!PageUptodate(page)) { | ||
346 | printk(KERN_ERR "btrfs: error reading free " | ||
347 | "space cache\n"); | ||
348 | io_ctl_drop_pages(io_ctl); | ||
349 | return -EIO; | ||
350 | } | ||
351 | } | ||
352 | } | ||
353 | |||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | ||
358 | { | ||
359 | u64 *val; | ||
360 | |||
361 | io_ctl_map_page(io_ctl, 1); | ||
362 | |||
363 | /* | ||
364 | * Skip the csum areas. If we don't check crcs then we just have a | ||
365 | * 64bit chunk at the front of the first page. | ||
366 | */ | ||
367 | if (io_ctl->check_crcs) { | ||
368 | io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); | ||
369 | io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); | ||
370 | } else { | ||
371 | io_ctl->cur += sizeof(u64); | ||
372 | io_ctl->size -= sizeof(u64) * 2; | ||
373 | } | ||
374 | |||
375 | val = io_ctl->cur; | ||
376 | *val = cpu_to_le64(generation); | ||
377 | io_ctl->cur += sizeof(u64); | ||
378 | } | ||
379 | |||
380 | static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) | ||
381 | { | ||
382 | u64 *gen; | ||
383 | |||
384 | /* | ||
385 | * Skip the crc area. If we don't check crcs then we just have a 64bit | ||
386 | * chunk at the front of the first page. | ||
387 | */ | ||
388 | if (io_ctl->check_crcs) { | ||
389 | io_ctl->cur += sizeof(u32) * io_ctl->num_pages; | ||
390 | io_ctl->size -= sizeof(u64) + | ||
391 | (sizeof(u32) * io_ctl->num_pages); | ||
392 | } else { | ||
393 | io_ctl->cur += sizeof(u64); | ||
394 | io_ctl->size -= sizeof(u64) * 2; | ||
395 | } | ||
396 | |||
397 | gen = io_ctl->cur; | ||
398 | if (le64_to_cpu(*gen) != generation) { | ||
399 | printk_ratelimited(KERN_ERR "btrfs: space cache generation " | ||
400 | "(%Lu) does not match inode (%Lu)\n", *gen, | ||
401 | generation); | ||
402 | io_ctl_unmap_page(io_ctl); | ||
403 | return -EIO; | ||
404 | } | ||
405 | io_ctl->cur += sizeof(u64); | ||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) | ||
410 | { | ||
411 | u32 *tmp; | ||
412 | u32 crc = ~(u32)0; | ||
413 | unsigned offset = 0; | ||
414 | |||
415 | if (!io_ctl->check_crcs) { | ||
416 | io_ctl_unmap_page(io_ctl); | ||
417 | return; | ||
418 | } | ||
419 | |||
420 | if (index == 0) | ||
421 | offset = sizeof(u32) * io_ctl->num_pages;; | ||
422 | |||
423 | crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, | ||
424 | PAGE_CACHE_SIZE - offset); | ||
425 | btrfs_csum_final(crc, (char *)&crc); | ||
426 | io_ctl_unmap_page(io_ctl); | ||
427 | tmp = kmap(io_ctl->pages[0]); | ||
428 | tmp += index; | ||
429 | *tmp = crc; | ||
430 | kunmap(io_ctl->pages[0]); | ||
431 | } | ||
432 | |||
433 | static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) | ||
434 | { | ||
435 | u32 *tmp, val; | ||
436 | u32 crc = ~(u32)0; | ||
437 | unsigned offset = 0; | ||
438 | |||
439 | if (!io_ctl->check_crcs) { | ||
440 | io_ctl_map_page(io_ctl, 0); | ||
441 | return 0; | ||
442 | } | ||
443 | |||
444 | if (index == 0) | ||
445 | offset = sizeof(u32) * io_ctl->num_pages; | ||
446 | |||
447 | tmp = kmap(io_ctl->pages[0]); | ||
448 | tmp += index; | ||
449 | val = *tmp; | ||
450 | kunmap(io_ctl->pages[0]); | ||
451 | |||
452 | io_ctl_map_page(io_ctl, 0); | ||
453 | crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, | ||
454 | PAGE_CACHE_SIZE - offset); | ||
455 | btrfs_csum_final(crc, (char *)&crc); | ||
456 | if (val != crc) { | ||
457 | printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " | ||
458 | "space cache\n"); | ||
459 | io_ctl_unmap_page(io_ctl); | ||
460 | return -EIO; | ||
461 | } | ||
462 | |||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, | ||
467 | void *bitmap) | ||
468 | { | ||
469 | struct btrfs_free_space_entry *entry; | ||
470 | |||
471 | if (!io_ctl->cur) | ||
472 | return -ENOSPC; | ||
473 | |||
474 | entry = io_ctl->cur; | ||
475 | entry->offset = cpu_to_le64(offset); | ||
476 | entry->bytes = cpu_to_le64(bytes); | ||
477 | entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : | ||
478 | BTRFS_FREE_SPACE_EXTENT; | ||
479 | io_ctl->cur += sizeof(struct btrfs_free_space_entry); | ||
480 | io_ctl->size -= sizeof(struct btrfs_free_space_entry); | ||
481 | |||
482 | if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) | ||
483 | return 0; | ||
484 | |||
485 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
486 | |||
487 | /* No more pages to map */ | ||
488 | if (io_ctl->index >= io_ctl->num_pages) | ||
489 | return 0; | ||
490 | |||
491 | /* map the next page */ | ||
492 | io_ctl_map_page(io_ctl, 1); | ||
493 | return 0; | ||
494 | } | ||
495 | |||
496 | static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) | ||
497 | { | ||
498 | if (!io_ctl->cur) | ||
499 | return -ENOSPC; | ||
500 | |||
501 | /* | ||
502 | * If we aren't at the start of the current page, unmap this one and | ||
503 | * map the next one if there is any left. | ||
504 | */ | ||
505 | if (io_ctl->cur != io_ctl->orig) { | ||
506 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
507 | if (io_ctl->index >= io_ctl->num_pages) | ||
508 | return -ENOSPC; | ||
509 | io_ctl_map_page(io_ctl, 0); | ||
510 | } | ||
511 | |||
512 | memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); | ||
513 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
514 | if (io_ctl->index < io_ctl->num_pages) | ||
515 | io_ctl_map_page(io_ctl, 0); | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) | ||
520 | { | ||
521 | /* | ||
522 | * If we're not on the boundary we know we've modified the page and we | ||
523 | * need to crc the page. | ||
524 | */ | ||
525 | if (io_ctl->cur != io_ctl->orig) | ||
526 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
527 | else | ||
528 | io_ctl_unmap_page(io_ctl); | ||
529 | |||
530 | while (io_ctl->index < io_ctl->num_pages) { | ||
531 | io_ctl_map_page(io_ctl, 1); | ||
532 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
533 | } | ||
534 | } | ||
535 | |||
536 | static int io_ctl_read_entry(struct io_ctl *io_ctl, | ||
537 | struct btrfs_free_space *entry, u8 *type) | ||
538 | { | ||
539 | struct btrfs_free_space_entry *e; | ||
540 | |||
541 | e = io_ctl->cur; | ||
542 | entry->offset = le64_to_cpu(e->offset); | ||
543 | entry->bytes = le64_to_cpu(e->bytes); | ||
544 | *type = e->type; | ||
545 | io_ctl->cur += sizeof(struct btrfs_free_space_entry); | ||
546 | io_ctl->size -= sizeof(struct btrfs_free_space_entry); | ||
547 | |||
548 | if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) | ||
549 | return 0; | ||
550 | |||
551 | io_ctl_unmap_page(io_ctl); | ||
552 | |||
553 | if (io_ctl->index >= io_ctl->num_pages) | ||
554 | return 0; | ||
555 | |||
556 | return io_ctl_check_crc(io_ctl, io_ctl->index); | ||
557 | } | ||
558 | |||
559 | static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | ||
560 | struct btrfs_free_space *entry) | ||
561 | { | ||
562 | int ret; | ||
563 | |||
564 | if (io_ctl->cur && io_ctl->cur != io_ctl->orig) | ||
565 | io_ctl_unmap_page(io_ctl); | ||
566 | |||
567 | ret = io_ctl_check_crc(io_ctl, io_ctl->index); | ||
568 | if (ret) | ||
569 | return ret; | ||
570 | |||
571 | memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); | ||
572 | io_ctl_unmap_page(io_ctl); | ||
573 | |||
574 | return 0; | ||
575 | } | ||
576 | |||
245 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | 577 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, |
246 | struct btrfs_free_space_ctl *ctl, | 578 | struct btrfs_free_space_ctl *ctl, |
247 | struct btrfs_path *path, u64 offset) | 579 | struct btrfs_path *path, u64 offset) |
248 | { | 580 | { |
249 | struct btrfs_free_space_header *header; | 581 | struct btrfs_free_space_header *header; |
250 | struct extent_buffer *leaf; | 582 | struct extent_buffer *leaf; |
251 | struct page *page; | 583 | struct io_ctl io_ctl; |
252 | struct btrfs_key key; | 584 | struct btrfs_key key; |
585 | struct btrfs_free_space *e, *n; | ||
253 | struct list_head bitmaps; | 586 | struct list_head bitmaps; |
254 | u64 num_entries; | 587 | u64 num_entries; |
255 | u64 num_bitmaps; | 588 | u64 num_bitmaps; |
256 | u64 generation; | 589 | u64 generation; |
257 | pgoff_t index = 0; | 590 | u8 type; |
258 | int ret = 0; | 591 | int ret = 0; |
259 | 592 | ||
260 | INIT_LIST_HEAD(&bitmaps); | 593 | INIT_LIST_HEAD(&bitmaps); |
261 | 594 | ||
262 | /* Nothing in the space cache, goodbye */ | 595 | /* Nothing in the space cache, goodbye */ |
263 | if (!i_size_read(inode)) | 596 | if (!i_size_read(inode)) |
264 | goto out; | 597 | return 0; |
265 | 598 | ||
266 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 599 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
267 | key.offset = offset; | 600 | key.offset = offset; |
@@ -269,11 +602,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
269 | 602 | ||
270 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 603 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
271 | if (ret < 0) | 604 | if (ret < 0) |
272 | goto out; | 605 | return 0; |
273 | else if (ret > 0) { | 606 | else if (ret > 0) { |
274 | btrfs_release_path(path); | 607 | btrfs_release_path(path); |
275 | ret = 0; | 608 | return 0; |
276 | goto out; | ||
277 | } | 609 | } |
278 | 610 | ||
279 | ret = -1; | 611 | ret = -1; |
@@ -291,169 +623,100 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
291 | " not match free space cache generation (%llu)\n", | 623 | " not match free space cache generation (%llu)\n", |
292 | (unsigned long long)BTRFS_I(inode)->generation, | 624 | (unsigned long long)BTRFS_I(inode)->generation, |
293 | (unsigned long long)generation); | 625 | (unsigned long long)generation); |
294 | goto out; | 626 | return 0; |
295 | } | 627 | } |
296 | 628 | ||
297 | if (!num_entries) | 629 | if (!num_entries) |
298 | goto out; | 630 | return 0; |
299 | 631 | ||
632 | io_ctl_init(&io_ctl, inode, root); | ||
300 | ret = readahead_cache(inode); | 633 | ret = readahead_cache(inode); |
301 | if (ret) | 634 | if (ret) |
302 | goto out; | 635 | goto out; |
303 | 636 | ||
304 | while (1) { | 637 | ret = io_ctl_prepare_pages(&io_ctl, inode, 1); |
305 | struct btrfs_free_space_entry *entry; | 638 | if (ret) |
306 | struct btrfs_free_space *e; | 639 | goto out; |
307 | void *addr; | ||
308 | unsigned long offset = 0; | ||
309 | int need_loop = 0; | ||
310 | 640 | ||
311 | if (!num_entries && !num_bitmaps) | 641 | ret = io_ctl_check_crc(&io_ctl, 0); |
312 | break; | 642 | if (ret) |
643 | goto free_cache; | ||
313 | 644 | ||
314 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | 645 | ret = io_ctl_check_generation(&io_ctl, generation); |
315 | if (!page) | 646 | if (ret) |
647 | goto free_cache; | ||
648 | |||
649 | while (num_entries) { | ||
650 | e = kmem_cache_zalloc(btrfs_free_space_cachep, | ||
651 | GFP_NOFS); | ||
652 | if (!e) | ||
316 | goto free_cache; | 653 | goto free_cache; |
317 | 654 | ||
318 | if (!PageUptodate(page)) { | 655 | ret = io_ctl_read_entry(&io_ctl, e, &type); |
319 | btrfs_readpage(NULL, page); | 656 | if (ret) { |
320 | lock_page(page); | 657 | kmem_cache_free(btrfs_free_space_cachep, e); |
321 | if (!PageUptodate(page)) { | 658 | goto free_cache; |
322 | unlock_page(page); | ||
323 | page_cache_release(page); | ||
324 | printk(KERN_ERR "btrfs: error reading free " | ||
325 | "space cache\n"); | ||
326 | goto free_cache; | ||
327 | } | ||
328 | } | 659 | } |
329 | addr = kmap(page); | ||
330 | 660 | ||
331 | if (index == 0) { | 661 | if (!e->bytes) { |
332 | u64 *gen; | 662 | kmem_cache_free(btrfs_free_space_cachep, e); |
663 | goto free_cache; | ||
664 | } | ||
333 | 665 | ||
334 | /* | 666 | if (type == BTRFS_FREE_SPACE_EXTENT) { |
335 | * We put a bogus crc in the front of the first page in | 667 | spin_lock(&ctl->tree_lock); |
336 | * case old kernels try to mount a fs with the new | 668 | ret = link_free_space(ctl, e); |
337 | * format to make sure they discard the cache. | 669 | spin_unlock(&ctl->tree_lock); |
338 | */ | 670 | if (ret) { |
339 | addr += sizeof(u64); | 671 | printk(KERN_ERR "Duplicate entries in " |
340 | offset += sizeof(u64); | 672 | "free space cache, dumping\n"); |
341 | 673 | kmem_cache_free(btrfs_free_space_cachep, e); | |
342 | gen = addr; | ||
343 | if (*gen != BTRFS_I(inode)->generation) { | ||
344 | printk(KERN_ERR "btrfs: space cache generation" | ||
345 | " (%llu) does not match inode (%llu)\n", | ||
346 | (unsigned long long)*gen, | ||
347 | (unsigned long long) | ||
348 | BTRFS_I(inode)->generation); | ||
349 | kunmap(page); | ||
350 | unlock_page(page); | ||
351 | page_cache_release(page); | ||
352 | goto free_cache; | 674 | goto free_cache; |
353 | } | 675 | } |
354 | addr += sizeof(u64); | 676 | } else { |
355 | offset += sizeof(u64); | 677 | BUG_ON(!num_bitmaps); |
356 | } | 678 | num_bitmaps--; |
357 | entry = addr; | 679 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
358 | 680 | if (!e->bitmap) { | |
359 | while (1) { | 681 | kmem_cache_free( |
360 | if (!num_entries) | 682 | btrfs_free_space_cachep, e); |
361 | break; | ||
362 | |||
363 | need_loop = 1; | ||
364 | e = kmem_cache_zalloc(btrfs_free_space_cachep, | ||
365 | GFP_NOFS); | ||
366 | if (!e) { | ||
367 | kunmap(page); | ||
368 | unlock_page(page); | ||
369 | page_cache_release(page); | ||
370 | goto free_cache; | 683 | goto free_cache; |
371 | } | 684 | } |
372 | 685 | spin_lock(&ctl->tree_lock); | |
373 | e->offset = le64_to_cpu(entry->offset); | 686 | ret = link_free_space(ctl, e); |
374 | e->bytes = le64_to_cpu(entry->bytes); | 687 | ctl->total_bitmaps++; |
375 | if (!e->bytes) { | 688 | ctl->op->recalc_thresholds(ctl); |
376 | kunmap(page); | 689 | spin_unlock(&ctl->tree_lock); |
690 | if (ret) { | ||
691 | printk(KERN_ERR "Duplicate entries in " | ||
692 | "free space cache, dumping\n"); | ||
377 | kmem_cache_free(btrfs_free_space_cachep, e); | 693 | kmem_cache_free(btrfs_free_space_cachep, e); |
378 | unlock_page(page); | ||
379 | page_cache_release(page); | ||
380 | goto free_cache; | 694 | goto free_cache; |
381 | } | 695 | } |
382 | 696 | list_add_tail(&e->list, &bitmaps); | |
383 | if (entry->type == BTRFS_FREE_SPACE_EXTENT) { | ||
384 | spin_lock(&ctl->tree_lock); | ||
385 | ret = link_free_space(ctl, e); | ||
386 | spin_unlock(&ctl->tree_lock); | ||
387 | if (ret) { | ||
388 | printk(KERN_ERR "Duplicate entries in " | ||
389 | "free space cache, dumping\n"); | ||
390 | kunmap(page); | ||
391 | unlock_page(page); | ||
392 | page_cache_release(page); | ||
393 | goto free_cache; | ||
394 | } | ||
395 | } else { | ||
396 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
397 | if (!e->bitmap) { | ||
398 | kunmap(page); | ||
399 | kmem_cache_free( | ||
400 | btrfs_free_space_cachep, e); | ||
401 | unlock_page(page); | ||
402 | page_cache_release(page); | ||
403 | goto free_cache; | ||
404 | } | ||
405 | spin_lock(&ctl->tree_lock); | ||
406 | ret = link_free_space(ctl, e); | ||
407 | ctl->total_bitmaps++; | ||
408 | ctl->op->recalc_thresholds(ctl); | ||
409 | spin_unlock(&ctl->tree_lock); | ||
410 | if (ret) { | ||
411 | printk(KERN_ERR "Duplicate entries in " | ||
412 | "free space cache, dumping\n"); | ||
413 | kunmap(page); | ||
414 | unlock_page(page); | ||
415 | page_cache_release(page); | ||
416 | goto free_cache; | ||
417 | } | ||
418 | list_add_tail(&e->list, &bitmaps); | ||
419 | } | ||
420 | |||
421 | num_entries--; | ||
422 | offset += sizeof(struct btrfs_free_space_entry); | ||
423 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
424 | PAGE_CACHE_SIZE) | ||
425 | break; | ||
426 | entry++; | ||
427 | } | 697 | } |
428 | 698 | ||
429 | /* | 699 | num_entries--; |
430 | * We read an entry out of this page, we need to move on to the | 700 | } |
431 | * next page. | ||
432 | */ | ||
433 | if (need_loop) { | ||
434 | kunmap(page); | ||
435 | goto next; | ||
436 | } | ||
437 | 701 | ||
438 | /* | 702 | /* |
439 | * We add the bitmaps at the end of the entries in order that | 703 | * We add the bitmaps at the end of the entries in order that |
440 | * the bitmap entries are added to the cache. | 704 | * the bitmap entries are added to the cache. |
441 | */ | 705 | */ |
442 | e = list_entry(bitmaps.next, struct btrfs_free_space, list); | 706 | list_for_each_entry_safe(e, n, &bitmaps, list) { |
443 | list_del_init(&e->list); | 707 | list_del_init(&e->list); |
444 | memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); | 708 | ret = io_ctl_read_bitmap(&io_ctl, e); |
445 | kunmap(page); | 709 | if (ret) |
446 | num_bitmaps--; | 710 | goto free_cache; |
447 | next: | ||
448 | unlock_page(page); | ||
449 | page_cache_release(page); | ||
450 | index++; | ||
451 | } | 711 | } |
452 | 712 | ||
713 | io_ctl_drop_pages(&io_ctl); | ||
453 | ret = 1; | 714 | ret = 1; |
454 | out: | 715 | out: |
716 | io_ctl_free(&io_ctl); | ||
455 | return ret; | 717 | return ret; |
456 | free_cache: | 718 | free_cache: |
719 | io_ctl_drop_pages(&io_ctl); | ||
457 | __btrfs_remove_free_space_cache(ctl); | 720 | __btrfs_remove_free_space_cache(ctl); |
458 | goto out; | 721 | goto out; |
459 | } | 722 | } |
@@ -465,7 +728,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
465 | struct btrfs_root *root = fs_info->tree_root; | 728 | struct btrfs_root *root = fs_info->tree_root; |
466 | struct inode *inode; | 729 | struct inode *inode; |
467 | struct btrfs_path *path; | 730 | struct btrfs_path *path; |
468 | int ret; | 731 | int ret = 0; |
469 | bool matched; | 732 | bool matched; |
470 | u64 used = btrfs_block_group_used(&block_group->item); | 733 | u64 used = btrfs_block_group_used(&block_group->item); |
471 | 734 | ||
@@ -497,6 +760,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
497 | return 0; | 760 | return 0; |
498 | } | 761 | } |
499 | 762 | ||
763 | /* We may have converted the inode and made the cache invalid. */ | ||
764 | spin_lock(&block_group->lock); | ||
765 | if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { | ||
766 | spin_unlock(&block_group->lock); | ||
767 | goto out; | ||
768 | } | ||
769 | spin_unlock(&block_group->lock); | ||
770 | |||
500 | ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, | 771 | ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, |
501 | path, block_group->key.objectid); | 772 | path, block_group->key.objectid); |
502 | btrfs_free_path(path); | 773 | btrfs_free_path(path); |
@@ -530,6 +801,19 @@ out: | |||
530 | return ret; | 801 | return ret; |
531 | } | 802 | } |
532 | 803 | ||
804 | /** | ||
805 | * __btrfs_write_out_cache - write out cached info to an inode | ||
806 | * @root - the root the inode belongs to | ||
807 | * @ctl - the free space cache we are going to write out | ||
808 | * @block_group - the block_group for this cache if it belongs to a block_group | ||
809 | * @trans - the trans handle | ||
810 | * @path - the path to use | ||
811 | * @offset - the offset for the key we'll insert | ||
812 | * | ||
813 | * This function writes out a free space cache struct to disk for quick recovery | ||
814 | * on mount. This will return 0 if it was successfull in writing the cache out, | ||
815 | * and -1 if it was not. | ||
816 | */ | ||
533 | int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | 817 | int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, |
534 | struct btrfs_free_space_ctl *ctl, | 818 | struct btrfs_free_space_ctl *ctl, |
535 | struct btrfs_block_group_cache *block_group, | 819 | struct btrfs_block_group_cache *block_group, |
@@ -540,42 +824,24 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
540 | struct extent_buffer *leaf; | 824 | struct extent_buffer *leaf; |
541 | struct rb_node *node; | 825 | struct rb_node *node; |
542 | struct list_head *pos, *n; | 826 | struct list_head *pos, *n; |
543 | struct page **pages; | ||
544 | struct page *page; | ||
545 | struct extent_state *cached_state = NULL; | 827 | struct extent_state *cached_state = NULL; |
546 | struct btrfs_free_cluster *cluster = NULL; | 828 | struct btrfs_free_cluster *cluster = NULL; |
547 | struct extent_io_tree *unpin = NULL; | 829 | struct extent_io_tree *unpin = NULL; |
830 | struct io_ctl io_ctl; | ||
548 | struct list_head bitmap_list; | 831 | struct list_head bitmap_list; |
549 | struct btrfs_key key; | 832 | struct btrfs_key key; |
550 | u64 start, end, len; | 833 | u64 start, end, len; |
551 | u64 bytes = 0; | ||
552 | u32 crc = ~(u32)0; | ||
553 | int index = 0, num_pages = 0; | ||
554 | int entries = 0; | 834 | int entries = 0; |
555 | int bitmaps = 0; | 835 | int bitmaps = 0; |
556 | int ret = -1; | 836 | int ret; |
557 | bool next_page = false; | 837 | int err = -1; |
558 | bool out_of_space = false; | ||
559 | 838 | ||
560 | INIT_LIST_HEAD(&bitmap_list); | 839 | INIT_LIST_HEAD(&bitmap_list); |
561 | 840 | ||
562 | node = rb_first(&ctl->free_space_offset); | ||
563 | if (!node) | ||
564 | return 0; | ||
565 | |||
566 | if (!i_size_read(inode)) | 841 | if (!i_size_read(inode)) |
567 | return -1; | 842 | return -1; |
568 | 843 | ||
569 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 844 | io_ctl_init(&io_ctl, inode, root); |
570 | PAGE_CACHE_SHIFT; | ||
571 | |||
572 | filemap_write_and_wait(inode->i_mapping); | ||
573 | btrfs_wait_ordered_range(inode, inode->i_size & | ||
574 | ~(root->sectorsize - 1), (u64)-1); | ||
575 | |||
576 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
577 | if (!pages) | ||
578 | return -1; | ||
579 | 845 | ||
580 | /* Get the cluster for this block_group if it exists */ | 846 | /* Get the cluster for this block_group if it exists */ |
581 | if (block_group && !list_empty(&block_group->cluster_list)) | 847 | if (block_group && !list_empty(&block_group->cluster_list)) |
@@ -589,30 +855,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
589 | */ | 855 | */ |
590 | unpin = root->fs_info->pinned_extents; | 856 | unpin = root->fs_info->pinned_extents; |
591 | 857 | ||
592 | /* | 858 | /* Lock all pages first so we can lock the extent safely. */ |
593 | * Lock all pages first so we can lock the extent safely. | 859 | io_ctl_prepare_pages(&io_ctl, inode, 0); |
594 | * | ||
595 | * NOTE: Because we hold the ref the entire time we're going to write to | ||
596 | * the page find_get_page should never fail, so we don't do a check | ||
597 | * after find_get_page at this point. Just putting this here so people | ||
598 | * know and don't freak out. | ||
599 | */ | ||
600 | while (index < num_pages) { | ||
601 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
602 | if (!page) { | ||
603 | int i; | ||
604 | |||
605 | for (i = 0; i < num_pages; i++) { | ||
606 | unlock_page(pages[i]); | ||
607 | page_cache_release(pages[i]); | ||
608 | } | ||
609 | goto out; | ||
610 | } | ||
611 | pages[index] = page; | ||
612 | index++; | ||
613 | } | ||
614 | 860 | ||
615 | index = 0; | ||
616 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 861 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
617 | 0, &cached_state, GFP_NOFS); | 862 | 0, &cached_state, GFP_NOFS); |
618 | 863 | ||
@@ -623,189 +868,111 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
623 | if (block_group) | 868 | if (block_group) |
624 | start = block_group->key.objectid; | 869 | start = block_group->key.objectid; |
625 | 870 | ||
626 | /* Write out the extent entries */ | 871 | node = rb_first(&ctl->free_space_offset); |
627 | do { | 872 | if (!node && cluster) { |
628 | struct btrfs_free_space_entry *entry; | 873 | node = rb_first(&cluster->root); |
629 | void *addr, *orig; | 874 | cluster = NULL; |
630 | unsigned long offset = 0; | 875 | } |
631 | 876 | ||
632 | next_page = false; | 877 | /* Make sure we can fit our crcs into the first page */ |
878 | if (io_ctl.check_crcs && | ||
879 | (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { | ||
880 | WARN_ON(1); | ||
881 | goto out_nospc; | ||
882 | } | ||
633 | 883 | ||
634 | if (index >= num_pages) { | 884 | io_ctl_set_generation(&io_ctl, trans->transid); |
635 | out_of_space = true; | ||
636 | break; | ||
637 | } | ||
638 | 885 | ||
639 | page = pages[index]; | 886 | /* Write out the extent entries */ |
887 | while (node) { | ||
888 | struct btrfs_free_space *e; | ||
640 | 889 | ||
641 | orig = addr = kmap(page); | 890 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
642 | if (index == 0) { | 891 | entries++; |
643 | u64 *gen; | ||
644 | 892 | ||
645 | /* | 893 | ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, |
646 | * We're going to put in a bogus crc for this page to | 894 | e->bitmap); |
647 | * make sure that old kernels who aren't aware of this | 895 | if (ret) |
648 | * format will be sure to discard the cache. | 896 | goto out_nospc; |
649 | */ | ||
650 | addr += sizeof(u64); | ||
651 | offset += sizeof(u64); | ||
652 | 897 | ||
653 | gen = addr; | 898 | if (e->bitmap) { |
654 | *gen = trans->transid; | 899 | list_add_tail(&e->list, &bitmap_list); |
655 | addr += sizeof(u64); | 900 | bitmaps++; |
656 | offset += sizeof(u64); | ||
657 | } | 901 | } |
658 | entry = addr; | 902 | node = rb_next(node); |
659 | 903 | if (!node && cluster) { | |
660 | memset(addr, 0, PAGE_CACHE_SIZE - offset); | 904 | node = rb_first(&cluster->root); |
661 | while (node && !next_page) { | 905 | cluster = NULL; |
662 | struct btrfs_free_space *e; | ||
663 | |||
664 | e = rb_entry(node, struct btrfs_free_space, offset_index); | ||
665 | entries++; | ||
666 | |||
667 | entry->offset = cpu_to_le64(e->offset); | ||
668 | entry->bytes = cpu_to_le64(e->bytes); | ||
669 | if (e->bitmap) { | ||
670 | entry->type = BTRFS_FREE_SPACE_BITMAP; | ||
671 | list_add_tail(&e->list, &bitmap_list); | ||
672 | bitmaps++; | ||
673 | } else { | ||
674 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
675 | } | ||
676 | node = rb_next(node); | ||
677 | if (!node && cluster) { | ||
678 | node = rb_first(&cluster->root); | ||
679 | cluster = NULL; | ||
680 | } | ||
681 | offset += sizeof(struct btrfs_free_space_entry); | ||
682 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
683 | PAGE_CACHE_SIZE) | ||
684 | next_page = true; | ||
685 | entry++; | ||
686 | } | 906 | } |
907 | } | ||
687 | 908 | ||
688 | /* | 909 | /* |
689 | * We want to add any pinned extents to our free space cache | 910 | * We want to add any pinned extents to our free space cache |
690 | * so we don't leak the space | 911 | * so we don't leak the space |
691 | */ | 912 | */ |
692 | while (block_group && !next_page && | 913 | while (block_group && (start < block_group->key.objectid + |
693 | (start < block_group->key.objectid + | 914 | block_group->key.offset)) { |
694 | block_group->key.offset)) { | 915 | ret = find_first_extent_bit(unpin, start, &start, &end, |
695 | ret = find_first_extent_bit(unpin, start, &start, &end, | 916 | EXTENT_DIRTY); |
696 | EXTENT_DIRTY); | 917 | if (ret) { |
697 | if (ret) { | 918 | ret = 0; |
698 | ret = 0; | 919 | break; |
699 | break; | ||
700 | } | ||
701 | |||
702 | /* This pinned extent is out of our range */ | ||
703 | if (start >= block_group->key.objectid + | ||
704 | block_group->key.offset) | ||
705 | break; | ||
706 | |||
707 | len = block_group->key.objectid + | ||
708 | block_group->key.offset - start; | ||
709 | len = min(len, end + 1 - start); | ||
710 | |||
711 | entries++; | ||
712 | entry->offset = cpu_to_le64(start); | ||
713 | entry->bytes = cpu_to_le64(len); | ||
714 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
715 | |||
716 | start = end + 1; | ||
717 | offset += sizeof(struct btrfs_free_space_entry); | ||
718 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
719 | PAGE_CACHE_SIZE) | ||
720 | next_page = true; | ||
721 | entry++; | ||
722 | } | 920 | } |
723 | 921 | ||
724 | /* Generate bogus crc value */ | 922 | /* This pinned extent is out of our range */ |
725 | if (index == 0) { | 923 | if (start >= block_group->key.objectid + |
726 | u32 *tmp; | 924 | block_group->key.offset) |
727 | crc = btrfs_csum_data(root, orig + sizeof(u64), crc, | 925 | break; |
728 | PAGE_CACHE_SIZE - sizeof(u64)); | ||
729 | btrfs_csum_final(crc, (char *)&crc); | ||
730 | crc++; | ||
731 | tmp = orig; | ||
732 | *tmp = crc; | ||
733 | } | ||
734 | 926 | ||
735 | kunmap(page); | 927 | len = block_group->key.objectid + |
928 | block_group->key.offset - start; | ||
929 | len = min(len, end + 1 - start); | ||
736 | 930 | ||
737 | bytes += PAGE_CACHE_SIZE; | 931 | entries++; |
932 | ret = io_ctl_add_entry(&io_ctl, start, len, NULL); | ||
933 | if (ret) | ||
934 | goto out_nospc; | ||
738 | 935 | ||
739 | index++; | 936 | start = end + 1; |
740 | } while (node || next_page); | 937 | } |
741 | 938 | ||
742 | /* Write out the bitmaps */ | 939 | /* Write out the bitmaps */ |
743 | list_for_each_safe(pos, n, &bitmap_list) { | 940 | list_for_each_safe(pos, n, &bitmap_list) { |
744 | void *addr; | ||
745 | struct btrfs_free_space *entry = | 941 | struct btrfs_free_space *entry = |
746 | list_entry(pos, struct btrfs_free_space, list); | 942 | list_entry(pos, struct btrfs_free_space, list); |
747 | 943 | ||
748 | if (index >= num_pages) { | 944 | ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); |
749 | out_of_space = true; | 945 | if (ret) |
750 | break; | 946 | goto out_nospc; |
751 | } | ||
752 | page = pages[index]; | ||
753 | |||
754 | addr = kmap(page); | ||
755 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | ||
756 | kunmap(page); | ||
757 | bytes += PAGE_CACHE_SIZE; | ||
758 | |||
759 | list_del_init(&entry->list); | 947 | list_del_init(&entry->list); |
760 | index++; | ||
761 | } | ||
762 | |||
763 | if (out_of_space) { | ||
764 | btrfs_drop_pages(pages, num_pages); | ||
765 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
766 | i_size_read(inode) - 1, &cached_state, | ||
767 | GFP_NOFS); | ||
768 | ret = 0; | ||
769 | goto out; | ||
770 | } | 948 | } |
771 | 949 | ||
772 | /* Zero out the rest of the pages just to make sure */ | 950 | /* Zero out the rest of the pages just to make sure */ |
773 | while (index < num_pages) { | 951 | io_ctl_zero_remaining_pages(&io_ctl); |
774 | void *addr; | ||
775 | |||
776 | page = pages[index]; | ||
777 | addr = kmap(page); | ||
778 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
779 | kunmap(page); | ||
780 | bytes += PAGE_CACHE_SIZE; | ||
781 | index++; | ||
782 | } | ||
783 | 952 | ||
784 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | 953 | ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, |
785 | bytes, &cached_state); | 954 | 0, i_size_read(inode), &cached_state); |
786 | btrfs_drop_pages(pages, num_pages); | 955 | io_ctl_drop_pages(&io_ctl); |
787 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 956 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
788 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 957 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
789 | 958 | ||
790 | if (ret) { | 959 | if (ret) |
791 | ret = 0; | ||
792 | goto out; | 960 | goto out; |
793 | } | ||
794 | 961 | ||
795 | BTRFS_I(inode)->generation = trans->transid; | ||
796 | 962 | ||
797 | filemap_write_and_wait(inode->i_mapping); | 963 | ret = filemap_write_and_wait(inode->i_mapping); |
964 | if (ret) | ||
965 | goto out; | ||
798 | 966 | ||
799 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 967 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
800 | key.offset = offset; | 968 | key.offset = offset; |
801 | key.type = 0; | 969 | key.type = 0; |
802 | 970 | ||
803 | ret = btrfs_search_slot(trans, root, &key, path, 1, 1); | 971 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
804 | if (ret < 0) { | 972 | if (ret < 0) { |
805 | ret = -1; | 973 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, |
806 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 974 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, |
807 | EXTENT_DIRTY | EXTENT_DELALLOC | | 975 | GFP_NOFS); |
808 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | ||
809 | goto out; | 976 | goto out; |
810 | } | 977 | } |
811 | leaf = path->nodes[0]; | 978 | leaf = path->nodes[0]; |
@@ -816,15 +983,16 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
816 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 983 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
817 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | 984 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || |
818 | found_key.offset != offset) { | 985 | found_key.offset != offset) { |
819 | ret = -1; | 986 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, |
820 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 987 | inode->i_size - 1, |
821 | EXTENT_DIRTY | EXTENT_DELALLOC | | 988 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, |
822 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | 989 | NULL, GFP_NOFS); |
823 | GFP_NOFS); | ||
824 | btrfs_release_path(path); | 990 | btrfs_release_path(path); |
825 | goto out; | 991 | goto out; |
826 | } | 992 | } |
827 | } | 993 | } |
994 | |||
995 | BTRFS_I(inode)->generation = trans->transid; | ||
828 | header = btrfs_item_ptr(leaf, path->slots[0], | 996 | header = btrfs_item_ptr(leaf, path->slots[0], |
829 | struct btrfs_free_space_header); | 997 | struct btrfs_free_space_header); |
830 | btrfs_set_free_space_entries(leaf, header, entries); | 998 | btrfs_set_free_space_entries(leaf, header, entries); |
@@ -833,16 +1001,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
833 | btrfs_mark_buffer_dirty(leaf); | 1001 | btrfs_mark_buffer_dirty(leaf); |
834 | btrfs_release_path(path); | 1002 | btrfs_release_path(path); |
835 | 1003 | ||
836 | ret = 1; | 1004 | err = 0; |
837 | |||
838 | out: | 1005 | out: |
839 | kfree(pages); | 1006 | io_ctl_free(&io_ctl); |
840 | if (ret != 1) { | 1007 | if (err) { |
841 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 1008 | invalidate_inode_pages2(inode->i_mapping); |
842 | BTRFS_I(inode)->generation = 0; | 1009 | BTRFS_I(inode)->generation = 0; |
843 | } | 1010 | } |
844 | btrfs_update_inode(trans, root, inode); | 1011 | btrfs_update_inode(trans, root, inode); |
845 | return ret; | 1012 | return err; |
1013 | |||
1014 | out_nospc: | ||
1015 | list_for_each_safe(pos, n, &bitmap_list) { | ||
1016 | struct btrfs_free_space *entry = | ||
1017 | list_entry(pos, struct btrfs_free_space, list); | ||
1018 | list_del_init(&entry->list); | ||
1019 | } | ||
1020 | io_ctl_drop_pages(&io_ctl); | ||
1021 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
1022 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | ||
1023 | goto out; | ||
846 | } | 1024 | } |
847 | 1025 | ||
848 | int btrfs_write_out_cache(struct btrfs_root *root, | 1026 | int btrfs_write_out_cache(struct btrfs_root *root, |
@@ -869,14 +1047,15 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
869 | 1047 | ||
870 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, | 1048 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, |
871 | path, block_group->key.objectid); | 1049 | path, block_group->key.objectid); |
872 | if (ret < 0) { | 1050 | if (ret) { |
873 | spin_lock(&block_group->lock); | 1051 | spin_lock(&block_group->lock); |
874 | block_group->disk_cache_state = BTRFS_DC_ERROR; | 1052 | block_group->disk_cache_state = BTRFS_DC_ERROR; |
875 | spin_unlock(&block_group->lock); | 1053 | spin_unlock(&block_group->lock); |
876 | ret = 0; | 1054 | ret = 0; |
877 | 1055 | #ifdef DEBUG | |
878 | printk(KERN_ERR "btrfs: failed to write free space cace " | 1056 | printk(KERN_ERR "btrfs: failed to write free space cace " |
879 | "for block group %llu\n", block_group->key.objectid); | 1057 | "for block group %llu\n", block_group->key.objectid); |
1058 | #endif | ||
880 | } | 1059 | } |
881 | 1060 | ||
882 | iput(inode); | 1061 | iput(inode); |
@@ -1701,6 +1880,7 @@ again: | |||
1701 | ctl->total_bitmaps--; | 1880 | ctl->total_bitmaps--; |
1702 | } | 1881 | } |
1703 | kmem_cache_free(btrfs_free_space_cachep, info); | 1882 | kmem_cache_free(btrfs_free_space_cachep, info); |
1883 | ret = 0; | ||
1704 | goto out_lock; | 1884 | goto out_lock; |
1705 | } | 1885 | } |
1706 | 1886 | ||
@@ -1708,7 +1888,8 @@ again: | |||
1708 | unlink_free_space(ctl, info); | 1888 | unlink_free_space(ctl, info); |
1709 | info->offset += bytes; | 1889 | info->offset += bytes; |
1710 | info->bytes -= bytes; | 1890 | info->bytes -= bytes; |
1711 | link_free_space(ctl, info); | 1891 | ret = link_free_space(ctl, info); |
1892 | WARN_ON(ret); | ||
1712 | goto out_lock; | 1893 | goto out_lock; |
1713 | } | 1894 | } |
1714 | 1895 | ||
@@ -2472,9 +2653,19 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
2472 | spin_unlock(&ctl->tree_lock); | 2653 | spin_unlock(&ctl->tree_lock); |
2473 | 2654 | ||
2474 | if (bytes >= minlen) { | 2655 | if (bytes >= minlen) { |
2475 | int update_ret; | 2656 | struct btrfs_space_info *space_info; |
2476 | update_ret = btrfs_update_reserved_bytes(block_group, | 2657 | int update = 0; |
2477 | bytes, 1, 1); | 2658 | |
2659 | space_info = block_group->space_info; | ||
2660 | spin_lock(&space_info->lock); | ||
2661 | spin_lock(&block_group->lock); | ||
2662 | if (!block_group->ro) { | ||
2663 | block_group->reserved += bytes; | ||
2664 | space_info->bytes_reserved += bytes; | ||
2665 | update = 1; | ||
2666 | } | ||
2667 | spin_unlock(&block_group->lock); | ||
2668 | spin_unlock(&space_info->lock); | ||
2478 | 2669 | ||
2479 | ret = btrfs_error_discard_extent(fs_info->extent_root, | 2670 | ret = btrfs_error_discard_extent(fs_info->extent_root, |
2480 | start, | 2671 | start, |
@@ -2482,9 +2673,16 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
2482 | &actually_trimmed); | 2673 | &actually_trimmed); |
2483 | 2674 | ||
2484 | btrfs_add_free_space(block_group, start, bytes); | 2675 | btrfs_add_free_space(block_group, start, bytes); |
2485 | if (!update_ret) | 2676 | if (update) { |
2486 | btrfs_update_reserved_bytes(block_group, | 2677 | spin_lock(&space_info->lock); |
2487 | bytes, 0, 1); | 2678 | spin_lock(&block_group->lock); |
2679 | if (block_group->ro) | ||
2680 | space_info->bytes_readonly += bytes; | ||
2681 | block_group->reserved -= bytes; | ||
2682 | space_info->bytes_reserved -= bytes; | ||
2683 | spin_unlock(&space_info->lock); | ||
2684 | spin_unlock(&block_group->lock); | ||
2685 | } | ||
2488 | 2686 | ||
2489 | if (ret) | 2687 | if (ret) |
2490 | break; | 2688 | break; |
@@ -2643,9 +2841,13 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, | |||
2643 | return 0; | 2841 | return 0; |
2644 | 2842 | ||
2645 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); | 2843 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); |
2646 | if (ret < 0) | 2844 | if (ret) { |
2845 | btrfs_delalloc_release_metadata(inode, inode->i_size); | ||
2846 | #ifdef DEBUG | ||
2647 | printk(KERN_ERR "btrfs: failed to write free ino cache " | 2847 | printk(KERN_ERR "btrfs: failed to write free ino cache " |
2648 | "for root %llu\n", root->root_key.objectid); | 2848 | "for root %llu\n", root->root_key.objectid); |
2849 | #endif | ||
2850 | } | ||
2649 | 2851 | ||
2650 | iput(inode); | 2852 | iput(inode); |
2651 | return ret; | 2853 | return ret; |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b4087e0fa871..53dcbdf446cd 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -465,14 +465,16 @@ again: | |||
465 | /* Just to make sure we have enough space */ | 465 | /* Just to make sure we have enough space */ |
466 | prealloc += 8 * PAGE_CACHE_SIZE; | 466 | prealloc += 8 * PAGE_CACHE_SIZE; |
467 | 467 | ||
468 | ret = btrfs_check_data_free_space(inode, prealloc); | 468 | ret = btrfs_delalloc_reserve_space(inode, prealloc); |
469 | if (ret) | 469 | if (ret) |
470 | goto out_put; | 470 | goto out_put; |
471 | 471 | ||
472 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, | 472 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, |
473 | prealloc, prealloc, &alloc_hint); | 473 | prealloc, prealloc, &alloc_hint); |
474 | if (ret) | 474 | if (ret) { |
475 | btrfs_delalloc_release_space(inode, prealloc); | ||
475 | goto out_put; | 476 | goto out_put; |
477 | } | ||
476 | btrfs_free_reserved_data_space(inode, prealloc); | 478 | btrfs_free_reserved_data_space(inode, prealloc); |
477 | 479 | ||
478 | out_put: | 480 | out_put: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9327f45434e8..9d0eaa57d4ee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -393,7 +393,10 @@ again: | |||
393 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | 393 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { |
394 | WARN_ON(pages); | 394 | WARN_ON(pages); |
395 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 395 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
396 | BUG_ON(!pages); | 396 | if (!pages) { |
397 | /* just bail out to the uncompressed code */ | ||
398 | goto cont; | ||
399 | } | ||
397 | 400 | ||
398 | if (BTRFS_I(inode)->force_compress) | 401 | if (BTRFS_I(inode)->force_compress) |
399 | compress_type = BTRFS_I(inode)->force_compress; | 402 | compress_type = BTRFS_I(inode)->force_compress; |
@@ -424,6 +427,7 @@ again: | |||
424 | will_compress = 1; | 427 | will_compress = 1; |
425 | } | 428 | } |
426 | } | 429 | } |
430 | cont: | ||
427 | if (start == 0) { | 431 | if (start == 0) { |
428 | trans = btrfs_join_transaction(root); | 432 | trans = btrfs_join_transaction(root); |
429 | BUG_ON(IS_ERR(trans)); | 433 | BUG_ON(IS_ERR(trans)); |
@@ -820,7 +824,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
820 | } | 824 | } |
821 | 825 | ||
822 | BUG_ON(disk_num_bytes > | 826 | BUG_ON(disk_num_bytes > |
823 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 827 | btrfs_super_total_bytes(root->fs_info->super_copy)); |
824 | 828 | ||
825 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | 829 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); |
826 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 830 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
@@ -1792,12 +1796,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1792 | } | 1796 | } |
1793 | ret = 0; | 1797 | ret = 0; |
1794 | out: | 1798 | out: |
1795 | if (nolock) { | 1799 | if (root != root->fs_info->tree_root) |
1796 | if (trans) | ||
1797 | btrfs_end_transaction_nolock(trans, root); | ||
1798 | } else { | ||
1799 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1800 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
1800 | if (trans) | 1801 | if (trans) { |
1802 | if (nolock) | ||
1803 | btrfs_end_transaction_nolock(trans, root); | ||
1804 | else | ||
1801 | btrfs_end_transaction(trans, root); | 1805 | btrfs_end_transaction(trans, root); |
1802 | } | 1806 | } |
1803 | 1807 | ||
@@ -1931,89 +1935,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
1931 | up_read(&root->fs_info->cleanup_work_sem); | 1935 | up_read(&root->fs_info->cleanup_work_sem); |
1932 | } | 1936 | } |
1933 | 1937 | ||
1934 | /* | ||
1935 | * calculate extra metadata reservation when snapshotting a subvolume | ||
1936 | * contains orphan files. | ||
1937 | */ | ||
1938 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
1939 | struct btrfs_pending_snapshot *pending, | ||
1940 | u64 *bytes_to_reserve) | ||
1941 | { | ||
1942 | struct btrfs_root *root; | ||
1943 | struct btrfs_block_rsv *block_rsv; | ||
1944 | u64 num_bytes; | ||
1945 | int index; | ||
1946 | |||
1947 | root = pending->root; | ||
1948 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
1949 | return; | ||
1950 | |||
1951 | block_rsv = root->orphan_block_rsv; | ||
1952 | |||
1953 | /* orphan block reservation for the snapshot */ | ||
1954 | num_bytes = block_rsv->size; | ||
1955 | |||
1956 | /* | ||
1957 | * after the snapshot is created, COWing tree blocks may use more | ||
1958 | * space than it frees. So we should make sure there is enough | ||
1959 | * reserved space. | ||
1960 | */ | ||
1961 | index = trans->transid & 0x1; | ||
1962 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
1963 | num_bytes += block_rsv->size - | ||
1964 | (block_rsv->reserved + block_rsv->freed[index]); | ||
1965 | } | ||
1966 | |||
1967 | *bytes_to_reserve += num_bytes; | ||
1968 | } | ||
1969 | |||
1970 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
1971 | struct btrfs_pending_snapshot *pending) | ||
1972 | { | ||
1973 | struct btrfs_root *root = pending->root; | ||
1974 | struct btrfs_root *snap = pending->snap; | ||
1975 | struct btrfs_block_rsv *block_rsv; | ||
1976 | u64 num_bytes; | ||
1977 | int index; | ||
1978 | int ret; | ||
1979 | |||
1980 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
1981 | return; | ||
1982 | |||
1983 | /* refill source subvolume's orphan block reservation */ | ||
1984 | block_rsv = root->orphan_block_rsv; | ||
1985 | index = trans->transid & 0x1; | ||
1986 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
1987 | num_bytes = block_rsv->size - | ||
1988 | (block_rsv->reserved + block_rsv->freed[index]); | ||
1989 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
1990 | root->orphan_block_rsv, | ||
1991 | num_bytes); | ||
1992 | BUG_ON(ret); | ||
1993 | } | ||
1994 | |||
1995 | /* setup orphan block reservation for the snapshot */ | ||
1996 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
1997 | BUG_ON(!block_rsv); | ||
1998 | |||
1999 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2000 | snap->orphan_block_rsv = block_rsv; | ||
2001 | |||
2002 | num_bytes = root->orphan_block_rsv->size; | ||
2003 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2004 | block_rsv, num_bytes); | ||
2005 | BUG_ON(ret); | ||
2006 | |||
2007 | #if 0 | ||
2008 | /* insert orphan item for the snapshot */ | ||
2009 | WARN_ON(!root->orphan_item_inserted); | ||
2010 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2011 | snap->root_key.objectid); | ||
2012 | BUG_ON(ret); | ||
2013 | snap->orphan_item_inserted = 1; | ||
2014 | #endif | ||
2015 | } | ||
2016 | |||
2017 | enum btrfs_orphan_cleanup_state { | 1938 | enum btrfs_orphan_cleanup_state { |
2018 | ORPHAN_CLEANUP_STARTED = 1, | 1939 | ORPHAN_CLEANUP_STARTED = 1, |
2019 | ORPHAN_CLEANUP_DONE = 2, | 1940 | ORPHAN_CLEANUP_DONE = 2, |
@@ -2099,9 +2020,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2099 | } | 2020 | } |
2100 | spin_unlock(&root->orphan_lock); | 2021 | spin_unlock(&root->orphan_lock); |
2101 | 2022 | ||
2102 | if (block_rsv) | ||
2103 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2104 | |||
2105 | /* grab metadata reservation from transaction handle */ | 2023 | /* grab metadata reservation from transaction handle */ |
2106 | if (reserve) { | 2024 | if (reserve) { |
2107 | ret = btrfs_orphan_reserve_metadata(trans, inode); | 2025 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
@@ -2168,6 +2086,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2168 | struct btrfs_key key, found_key; | 2086 | struct btrfs_key key, found_key; |
2169 | struct btrfs_trans_handle *trans; | 2087 | struct btrfs_trans_handle *trans; |
2170 | struct inode *inode; | 2088 | struct inode *inode; |
2089 | u64 last_objectid = 0; | ||
2171 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2090 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2172 | 2091 | ||
2173 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) | 2092 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
@@ -2219,41 +2138,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2219 | * crossing root thing. we store the inode number in the | 2138 | * crossing root thing. we store the inode number in the |
2220 | * offset of the orphan item. | 2139 | * offset of the orphan item. |
2221 | */ | 2140 | */ |
2141 | |||
2142 | if (found_key.offset == last_objectid) { | ||
2143 | printk(KERN_ERR "btrfs: Error removing orphan entry, " | ||
2144 | "stopping orphan cleanup\n"); | ||
2145 | ret = -EINVAL; | ||
2146 | goto out; | ||
2147 | } | ||
2148 | |||
2149 | last_objectid = found_key.offset; | ||
2150 | |||
2222 | found_key.objectid = found_key.offset; | 2151 | found_key.objectid = found_key.offset; |
2223 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2152 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2224 | found_key.offset = 0; | 2153 | found_key.offset = 0; |
2225 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2154 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2226 | if (IS_ERR(inode)) { | 2155 | ret = PTR_RET(inode); |
2227 | ret = PTR_ERR(inode); | 2156 | if (ret && ret != -ESTALE) |
2228 | goto out; | 2157 | goto out; |
2229 | } | ||
2230 | 2158 | ||
2231 | /* | 2159 | /* |
2232 | * add this inode to the orphan list so btrfs_orphan_del does | 2160 | * Inode is already gone but the orphan item is still there, |
2233 | * the proper thing when we hit it | 2161 | * kill the orphan item. |
2234 | */ | ||
2235 | spin_lock(&root->orphan_lock); | ||
2236 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
2237 | spin_unlock(&root->orphan_lock); | ||
2238 | |||
2239 | /* | ||
2240 | * if this is a bad inode, means we actually succeeded in | ||
2241 | * removing the inode, but not the orphan record, which means | ||
2242 | * we need to manually delete the orphan since iput will just | ||
2243 | * do a destroy_inode | ||
2244 | */ | 2162 | */ |
2245 | if (is_bad_inode(inode)) { | 2163 | if (ret == -ESTALE) { |
2246 | trans = btrfs_start_transaction(root, 0); | 2164 | trans = btrfs_start_transaction(root, 1); |
2247 | if (IS_ERR(trans)) { | 2165 | if (IS_ERR(trans)) { |
2248 | ret = PTR_ERR(trans); | 2166 | ret = PTR_ERR(trans); |
2249 | goto out; | 2167 | goto out; |
2250 | } | 2168 | } |
2251 | btrfs_orphan_del(trans, inode); | 2169 | ret = btrfs_del_orphan_item(trans, root, |
2170 | found_key.objectid); | ||
2171 | BUG_ON(ret); | ||
2252 | btrfs_end_transaction(trans, root); | 2172 | btrfs_end_transaction(trans, root); |
2253 | iput(inode); | ||
2254 | continue; | 2173 | continue; |
2255 | } | 2174 | } |
2256 | 2175 | ||
2176 | /* | ||
2177 | * add this inode to the orphan list so btrfs_orphan_del does | ||
2178 | * the proper thing when we hit it | ||
2179 | */ | ||
2180 | spin_lock(&root->orphan_lock); | ||
2181 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
2182 | spin_unlock(&root->orphan_lock); | ||
2183 | |||
2257 | /* if we have links, this was a truncate, lets do that */ | 2184 | /* if we have links, this was a truncate, lets do that */ |
2258 | if (inode->i_nlink) { | 2185 | if (inode->i_nlink) { |
2259 | if (!S_ISREG(inode->i_mode)) { | 2186 | if (!S_ISREG(inode->i_mode)) { |
@@ -2687,7 +2614,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2687 | u64 ino = btrfs_ino(inode); | 2614 | u64 ino = btrfs_ino(inode); |
2688 | u64 dir_ino = btrfs_ino(dir); | 2615 | u64 dir_ino = btrfs_ino(dir); |
2689 | 2616 | ||
2690 | trans = btrfs_start_transaction(root, 10); | 2617 | /* |
2618 | * 1 for the possible orphan item | ||
2619 | * 1 for the dir item | ||
2620 | * 1 for the dir index | ||
2621 | * 1 for the inode ref | ||
2622 | * 1 for the inode ref in the tree log | ||
2623 | * 2 for the dir entries in the log | ||
2624 | * 1 for the inode | ||
2625 | */ | ||
2626 | trans = btrfs_start_transaction(root, 8); | ||
2691 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 2627 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
2692 | return trans; | 2628 | return trans; |
2693 | 2629 | ||
@@ -2710,7 +2646,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2710 | return ERR_PTR(-ENOMEM); | 2646 | return ERR_PTR(-ENOMEM); |
2711 | } | 2647 | } |
2712 | 2648 | ||
2713 | trans = btrfs_start_transaction(root, 0); | 2649 | /* 1 for the orphan item */ |
2650 | trans = btrfs_start_transaction(root, 1); | ||
2714 | if (IS_ERR(trans)) { | 2651 | if (IS_ERR(trans)) { |
2715 | btrfs_free_path(path); | 2652 | btrfs_free_path(path); |
2716 | root->fs_info->enospc_unlink = 0; | 2653 | root->fs_info->enospc_unlink = 0; |
@@ -2815,6 +2752,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2815 | err = 0; | 2752 | err = 0; |
2816 | out: | 2753 | out: |
2817 | btrfs_free_path(path); | 2754 | btrfs_free_path(path); |
2755 | /* Migrate the orphan reservation over */ | ||
2756 | if (!err) | ||
2757 | err = btrfs_block_rsv_migrate(trans->block_rsv, | ||
2758 | &root->fs_info->global_block_rsv, | ||
2759 | trans->bytes_reserved); | ||
2760 | |||
2818 | if (err) { | 2761 | if (err) { |
2819 | btrfs_end_transaction(trans, root); | 2762 | btrfs_end_transaction(trans, root); |
2820 | root->fs_info->enospc_unlink = 0; | 2763 | root->fs_info->enospc_unlink = 0; |
@@ -2829,6 +2772,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, | |||
2829 | struct btrfs_root *root) | 2772 | struct btrfs_root *root) |
2830 | { | 2773 | { |
2831 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | 2774 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { |
2775 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
2776 | trans->bytes_reserved); | ||
2777 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
2832 | BUG_ON(!root->fs_info->enospc_unlink); | 2778 | BUG_ON(!root->fs_info->enospc_unlink); |
2833 | root->fs_info->enospc_unlink = 0; | 2779 | root->fs_info->enospc_unlink = 0; |
2834 | } | 2780 | } |
@@ -3220,6 +3166,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3220 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | 3166 | pgoff_t index = from >> PAGE_CACHE_SHIFT; |
3221 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3167 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3222 | struct page *page; | 3168 | struct page *page; |
3169 | gfp_t mask = btrfs_alloc_write_mask(mapping); | ||
3223 | int ret = 0; | 3170 | int ret = 0; |
3224 | u64 page_start; | 3171 | u64 page_start; |
3225 | u64 page_end; | 3172 | u64 page_end; |
@@ -3232,7 +3179,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3232 | 3179 | ||
3233 | ret = -ENOMEM; | 3180 | ret = -ENOMEM; |
3234 | again: | 3181 | again: |
3235 | page = find_or_create_page(mapping, index, GFP_NOFS); | 3182 | page = find_or_create_page(mapping, index, mask); |
3236 | if (!page) { | 3183 | if (!page) { |
3237 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3184 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3238 | goto out; | 3185 | goto out; |
@@ -3465,6 +3412,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3465 | { | 3412 | { |
3466 | struct btrfs_trans_handle *trans; | 3413 | struct btrfs_trans_handle *trans; |
3467 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3414 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3415 | struct btrfs_block_rsv *rsv, *global_rsv; | ||
3416 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
3468 | unsigned long nr; | 3417 | unsigned long nr; |
3469 | int ret; | 3418 | int ret; |
3470 | 3419 | ||
@@ -3492,22 +3441,55 @@ void btrfs_evict_inode(struct inode *inode) | |||
3492 | goto no_delete; | 3441 | goto no_delete; |
3493 | } | 3442 | } |
3494 | 3443 | ||
3444 | rsv = btrfs_alloc_block_rsv(root); | ||
3445 | if (!rsv) { | ||
3446 | btrfs_orphan_del(NULL, inode); | ||
3447 | goto no_delete; | ||
3448 | } | ||
3449 | rsv->size = min_size; | ||
3450 | global_rsv = &root->fs_info->global_block_rsv; | ||
3451 | |||
3495 | btrfs_i_size_write(inode, 0); | 3452 | btrfs_i_size_write(inode, 0); |
3496 | 3453 | ||
3454 | /* | ||
3455 | * This is a bit simpler than btrfs_truncate since | ||
3456 | * | ||
3457 | * 1) We've already reserved our space for our orphan item in the | ||
3458 | * unlink. | ||
3459 | * 2) We're going to delete the inode item, so we don't need to update | ||
3460 | * it at all. | ||
3461 | * | ||
3462 | * So we just need to reserve some slack space in case we add bytes when | ||
3463 | * doing the truncate. | ||
3464 | */ | ||
3497 | while (1) { | 3465 | while (1) { |
3498 | trans = btrfs_join_transaction(root); | 3466 | ret = btrfs_block_rsv_refill(root, rsv, min_size); |
3499 | BUG_ON(IS_ERR(trans)); | 3467 | |
3500 | trans->block_rsv = root->orphan_block_rsv; | 3468 | /* |
3469 | * Try and steal from the global reserve since we will | ||
3470 | * likely not use this space anyway, we want to try as | ||
3471 | * hard as possible to get this to work. | ||
3472 | */ | ||
3473 | if (ret) | ||
3474 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); | ||
3501 | 3475 | ||
3502 | ret = btrfs_block_rsv_check(trans, root, | ||
3503 | root->orphan_block_rsv, 0, 5); | ||
3504 | if (ret) { | 3476 | if (ret) { |
3505 | BUG_ON(ret != -EAGAIN); | 3477 | printk(KERN_WARNING "Could not get space for a " |
3506 | ret = btrfs_commit_transaction(trans, root); | 3478 | "delete, will truncate on mount %d\n", ret); |
3507 | BUG_ON(ret); | 3479 | btrfs_orphan_del(NULL, inode); |
3508 | continue; | 3480 | btrfs_free_block_rsv(root, rsv); |
3481 | goto no_delete; | ||
3482 | } | ||
3483 | |||
3484 | trans = btrfs_start_transaction(root, 0); | ||
3485 | if (IS_ERR(trans)) { | ||
3486 | btrfs_orphan_del(NULL, inode); | ||
3487 | btrfs_free_block_rsv(root, rsv); | ||
3488 | goto no_delete; | ||
3509 | } | 3489 | } |
3510 | 3490 | ||
3491 | trans->block_rsv = rsv; | ||
3492 | |||
3511 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3493 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); |
3512 | if (ret != -EAGAIN) | 3494 | if (ret != -EAGAIN) |
3513 | break; | 3495 | break; |
@@ -3516,14 +3498,17 @@ void btrfs_evict_inode(struct inode *inode) | |||
3516 | btrfs_end_transaction(trans, root); | 3498 | btrfs_end_transaction(trans, root); |
3517 | trans = NULL; | 3499 | trans = NULL; |
3518 | btrfs_btree_balance_dirty(root, nr); | 3500 | btrfs_btree_balance_dirty(root, nr); |
3519 | |||
3520 | } | 3501 | } |
3521 | 3502 | ||
3503 | btrfs_free_block_rsv(root, rsv); | ||
3504 | |||
3522 | if (ret == 0) { | 3505 | if (ret == 0) { |
3506 | trans->block_rsv = root->orphan_block_rsv; | ||
3523 | ret = btrfs_orphan_del(trans, inode); | 3507 | ret = btrfs_orphan_del(trans, inode); |
3524 | BUG_ON(ret); | 3508 | BUG_ON(ret); |
3525 | } | 3509 | } |
3526 | 3510 | ||
3511 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3527 | if (!(root == root->fs_info->tree_root || | 3512 | if (!(root == root->fs_info->tree_root || |
3528 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) | 3513 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) |
3529 | btrfs_return_ino(root, btrfs_ino(inode)); | 3514 | btrfs_return_ino(root, btrfs_ino(inode)); |
@@ -5647,8 +5632,7 @@ again: | |||
5647 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | 5632 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { |
5648 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5633 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5649 | if (!ret) | 5634 | if (!ret) |
5650 | ret = btrfs_update_inode(trans, root, inode); | 5635 | err = btrfs_update_inode(trans, root, inode); |
5651 | err = ret; | ||
5652 | goto out; | 5636 | goto out; |
5653 | } | 5637 | } |
5654 | 5638 | ||
@@ -6393,6 +6377,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6393 | struct btrfs_trans_handle *trans; | 6377 | struct btrfs_trans_handle *trans; |
6394 | unsigned long nr; | 6378 | unsigned long nr; |
6395 | u64 mask = root->sectorsize - 1; | 6379 | u64 mask = root->sectorsize - 1; |
6380 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
6396 | 6381 | ||
6397 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6382 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
6398 | if (ret) | 6383 | if (ret) |
@@ -6440,19 +6425,23 @@ static int btrfs_truncate(struct inode *inode) | |||
6440 | rsv = btrfs_alloc_block_rsv(root); | 6425 | rsv = btrfs_alloc_block_rsv(root); |
6441 | if (!rsv) | 6426 | if (!rsv) |
6442 | return -ENOMEM; | 6427 | return -ENOMEM; |
6443 | btrfs_add_durable_block_rsv(root->fs_info, rsv); | 6428 | rsv->size = min_size; |
6444 | 6429 | ||
6430 | /* | ||
6431 | * 1 for the truncate slack space | ||
6432 | * 1 for the orphan item we're going to add | ||
6433 | * 1 for the orphan item deletion | ||
6434 | * 1 for updating the inode. | ||
6435 | */ | ||
6445 | trans = btrfs_start_transaction(root, 4); | 6436 | trans = btrfs_start_transaction(root, 4); |
6446 | if (IS_ERR(trans)) { | 6437 | if (IS_ERR(trans)) { |
6447 | err = PTR_ERR(trans); | 6438 | err = PTR_ERR(trans); |
6448 | goto out; | 6439 | goto out; |
6449 | } | 6440 | } |
6450 | 6441 | ||
6451 | /* | 6442 | /* Migrate the slack space for the truncate to our reserve */ |
6452 | * Reserve space for the truncate process. Truncate should be adding | 6443 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, |
6453 | * space, but if there are snapshots it may end up using space. | 6444 | min_size); |
6454 | */ | ||
6455 | ret = btrfs_truncate_reserve_metadata(trans, root, rsv); | ||
6456 | BUG_ON(ret); | 6445 | BUG_ON(ret); |
6457 | 6446 | ||
6458 | ret = btrfs_orphan_add(trans, inode); | 6447 | ret = btrfs_orphan_add(trans, inode); |
@@ -6461,21 +6450,6 @@ static int btrfs_truncate(struct inode *inode) | |||
6461 | goto out; | 6450 | goto out; |
6462 | } | 6451 | } |
6463 | 6452 | ||
6464 | nr = trans->blocks_used; | ||
6465 | btrfs_end_transaction(trans, root); | ||
6466 | btrfs_btree_balance_dirty(root, nr); | ||
6467 | |||
6468 | /* | ||
6469 | * Ok so we've already migrated our bytes over for the truncate, so here | ||
6470 | * just reserve the one slot we need for updating the inode. | ||
6471 | */ | ||
6472 | trans = btrfs_start_transaction(root, 1); | ||
6473 | if (IS_ERR(trans)) { | ||
6474 | err = PTR_ERR(trans); | ||
6475 | goto out; | ||
6476 | } | ||
6477 | trans->block_rsv = rsv; | ||
6478 | |||
6479 | /* | 6453 | /* |
6480 | * setattr is responsible for setting the ordered_data_close flag, | 6454 | * setattr is responsible for setting the ordered_data_close flag, |
6481 | * but that is only tested during the last file release. That | 6455 | * but that is only tested during the last file release. That |
@@ -6497,20 +6471,30 @@ static int btrfs_truncate(struct inode *inode) | |||
6497 | btrfs_add_ordered_operation(trans, root, inode); | 6471 | btrfs_add_ordered_operation(trans, root, inode); |
6498 | 6472 | ||
6499 | while (1) { | 6473 | while (1) { |
6474 | ret = btrfs_block_rsv_refill(root, rsv, min_size); | ||
6475 | if (ret) { | ||
6476 | /* | ||
6477 | * This can only happen with the original transaction we | ||
6478 | * started above, every other time we shouldn't have a | ||
6479 | * transaction started yet. | ||
6480 | */ | ||
6481 | if (ret == -EAGAIN) | ||
6482 | goto end_trans; | ||
6483 | err = ret; | ||
6484 | break; | ||
6485 | } | ||
6486 | |||
6500 | if (!trans) { | 6487 | if (!trans) { |
6501 | trans = btrfs_start_transaction(root, 3); | 6488 | /* Just need the 1 for updating the inode */ |
6489 | trans = btrfs_start_transaction(root, 1); | ||
6502 | if (IS_ERR(trans)) { | 6490 | if (IS_ERR(trans)) { |
6503 | err = PTR_ERR(trans); | 6491 | err = PTR_ERR(trans); |
6504 | goto out; | 6492 | goto out; |
6505 | } | 6493 | } |
6506 | |||
6507 | ret = btrfs_truncate_reserve_metadata(trans, root, | ||
6508 | rsv); | ||
6509 | BUG_ON(ret); | ||
6510 | |||
6511 | trans->block_rsv = rsv; | ||
6512 | } | 6494 | } |
6513 | 6495 | ||
6496 | trans->block_rsv = rsv; | ||
6497 | |||
6514 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6498 | ret = btrfs_truncate_inode_items(trans, root, inode, |
6515 | inode->i_size, | 6499 | inode->i_size, |
6516 | BTRFS_EXTENT_DATA_KEY); | 6500 | BTRFS_EXTENT_DATA_KEY); |
@@ -6525,7 +6509,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6525 | err = ret; | 6509 | err = ret; |
6526 | break; | 6510 | break; |
6527 | } | 6511 | } |
6528 | 6512 | end_trans: | |
6529 | nr = trans->blocks_used; | 6513 | nr = trans->blocks_used; |
6530 | btrfs_end_transaction(trans, root); | 6514 | btrfs_end_transaction(trans, root); |
6531 | trans = NULL; | 6515 | trans = NULL; |
@@ -6607,9 +6591,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6607 | ei->last_sub_trans = 0; | 6591 | ei->last_sub_trans = 0; |
6608 | ei->logged_trans = 0; | 6592 | ei->logged_trans = 0; |
6609 | ei->delalloc_bytes = 0; | 6593 | ei->delalloc_bytes = 0; |
6610 | ei->reserved_bytes = 0; | ||
6611 | ei->disk_i_size = 0; | 6594 | ei->disk_i_size = 0; |
6612 | ei->flags = 0; | 6595 | ei->flags = 0; |
6596 | ei->csum_bytes = 0; | ||
6613 | ei->index_cnt = (u64)-1; | 6597 | ei->index_cnt = (u64)-1; |
6614 | ei->last_unlink_trans = 0; | 6598 | ei->last_unlink_trans = 0; |
6615 | 6599 | ||
@@ -6655,6 +6639,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6655 | WARN_ON(inode->i_data.nrpages); | 6639 | WARN_ON(inode->i_data.nrpages); |
6656 | WARN_ON(BTRFS_I(inode)->outstanding_extents); | 6640 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
6657 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 6641 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
6642 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); | ||
6643 | WARN_ON(BTRFS_I(inode)->csum_bytes); | ||
6658 | 6644 | ||
6659 | /* | 6645 | /* |
6660 | * This can happen where we create an inode, but somebody else also | 6646 | * This can happen where we create an inode, but somebody else also |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7f57efa76d11..cc9893990341 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -118,7 +118,7 @@ void btrfs_update_iflags(struct inode *inode) | |||
118 | /* | 118 | /* |
119 | * Inherit flags from the parent inode. | 119 | * Inherit flags from the parent inode. |
120 | * | 120 | * |
121 | * Unlike extN we don't have any flags we don't want to inherit currently. | 121 | * Currently only the compression flags and the cow flags are inherited. |
122 | */ | 122 | */ |
123 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) | 123 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) |
124 | { | 124 | { |
@@ -129,12 +129,17 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) | |||
129 | 129 | ||
130 | flags = BTRFS_I(dir)->flags; | 130 | flags = BTRFS_I(dir)->flags; |
131 | 131 | ||
132 | if (S_ISREG(inode->i_mode)) | 132 | if (flags & BTRFS_INODE_NOCOMPRESS) { |
133 | flags &= ~BTRFS_INODE_DIRSYNC; | 133 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; |
134 | else if (!S_ISDIR(inode->i_mode)) | 134 | BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; |
135 | flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); | 135 | } else if (flags & BTRFS_INODE_COMPRESS) { |
136 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; | ||
137 | BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; | ||
138 | } | ||
139 | |||
140 | if (flags & BTRFS_INODE_NODATACOW) | ||
141 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | ||
136 | 142 | ||
137 | BTRFS_I(inode)->flags = flags; | ||
138 | btrfs_update_iflags(inode); | 143 | btrfs_update_iflags(inode); |
139 | } | 144 | } |
140 | 145 | ||
@@ -278,6 +283,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
278 | struct fstrim_range range; | 283 | struct fstrim_range range; |
279 | u64 minlen = ULLONG_MAX; | 284 | u64 minlen = ULLONG_MAX; |
280 | u64 num_devices = 0; | 285 | u64 num_devices = 0; |
286 | u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); | ||
281 | int ret; | 287 | int ret; |
282 | 288 | ||
283 | if (!capable(CAP_SYS_ADMIN)) | 289 | if (!capable(CAP_SYS_ADMIN)) |
@@ -296,12 +302,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
296 | } | 302 | } |
297 | } | 303 | } |
298 | rcu_read_unlock(); | 304 | rcu_read_unlock(); |
305 | |||
299 | if (!num_devices) | 306 | if (!num_devices) |
300 | return -EOPNOTSUPP; | 307 | return -EOPNOTSUPP; |
301 | |||
302 | if (copy_from_user(&range, arg, sizeof(range))) | 308 | if (copy_from_user(&range, arg, sizeof(range))) |
303 | return -EFAULT; | 309 | return -EFAULT; |
310 | if (range.start > total_bytes) | ||
311 | return -EINVAL; | ||
304 | 312 | ||
313 | range.len = min(range.len, total_bytes - range.start); | ||
305 | range.minlen = max(range.minlen, minlen); | 314 | range.minlen = max(range.minlen, minlen); |
306 | ret = btrfs_trim_fs(root, &range); | 315 | ret = btrfs_trim_fs(root, &range); |
307 | if (ret < 0) | 316 | if (ret < 0) |
@@ -761,7 +770,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
761 | int ret = 1; | 770 | int ret = 1; |
762 | 771 | ||
763 | /* | 772 | /* |
764 | * make sure that once we start defragging and extent, we keep on | 773 | * make sure that once we start defragging an extent, we keep on |
765 | * defragging it | 774 | * defragging it |
766 | */ | 775 | */ |
767 | if (start < *defrag_end) | 776 | if (start < *defrag_end) |
@@ -806,7 +815,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
806 | * extent will force at least part of that big extent to be defragged. | 815 | * extent will force at least part of that big extent to be defragged. |
807 | */ | 816 | */ |
808 | if (ret) { | 817 | if (ret) { |
809 | *last_len += len; | ||
810 | *defrag_end = extent_map_end(em); | 818 | *defrag_end = extent_map_end(em); |
811 | } else { | 819 | } else { |
812 | *last_len = 0; | 820 | *last_len = 0; |
@@ -844,6 +852,7 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
844 | int i_done; | 852 | int i_done; |
845 | struct btrfs_ordered_extent *ordered; | 853 | struct btrfs_ordered_extent *ordered; |
846 | struct extent_state *cached_state = NULL; | 854 | struct extent_state *cached_state = NULL; |
855 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
847 | 856 | ||
848 | if (isize == 0) | 857 | if (isize == 0) |
849 | return 0; | 858 | return 0; |
@@ -861,7 +870,7 @@ again: | |||
861 | for (i = 0; i < num_pages; i++) { | 870 | for (i = 0; i < num_pages; i++) { |
862 | struct page *page; | 871 | struct page *page; |
863 | page = find_or_create_page(inode->i_mapping, | 872 | page = find_or_create_page(inode->i_mapping, |
864 | start_index + i, GFP_NOFS); | 873 | start_index + i, mask); |
865 | if (!page) | 874 | if (!page) |
866 | break; | 875 | break; |
867 | 876 | ||
@@ -973,18 +982,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
973 | struct btrfs_super_block *disk_super; | 982 | struct btrfs_super_block *disk_super; |
974 | struct file_ra_state *ra = NULL; | 983 | struct file_ra_state *ra = NULL; |
975 | unsigned long last_index; | 984 | unsigned long last_index; |
985 | u64 isize = i_size_read(inode); | ||
976 | u64 features; | 986 | u64 features; |
977 | u64 last_len = 0; | 987 | u64 last_len = 0; |
978 | u64 skip = 0; | 988 | u64 skip = 0; |
979 | u64 defrag_end = 0; | 989 | u64 defrag_end = 0; |
980 | u64 newer_off = range->start; | 990 | u64 newer_off = range->start; |
981 | int newer_left = 0; | ||
982 | unsigned long i; | 991 | unsigned long i; |
992 | unsigned long ra_index = 0; | ||
983 | int ret; | 993 | int ret; |
984 | int defrag_count = 0; | 994 | int defrag_count = 0; |
985 | int compress_type = BTRFS_COMPRESS_ZLIB; | 995 | int compress_type = BTRFS_COMPRESS_ZLIB; |
986 | int extent_thresh = range->extent_thresh; | 996 | int extent_thresh = range->extent_thresh; |
987 | int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; | 997 | int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; |
998 | int cluster = max_cluster; | ||
988 | u64 new_align = ~((u64)128 * 1024 - 1); | 999 | u64 new_align = ~((u64)128 * 1024 - 1); |
989 | struct page **pages = NULL; | 1000 | struct page **pages = NULL; |
990 | 1001 | ||
@@ -998,7 +1009,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
998 | compress_type = range->compress_type; | 1009 | compress_type = range->compress_type; |
999 | } | 1010 | } |
1000 | 1011 | ||
1001 | if (inode->i_size == 0) | 1012 | if (isize == 0) |
1002 | return 0; | 1013 | return 0; |
1003 | 1014 | ||
1004 | /* | 1015 | /* |
@@ -1014,7 +1025,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1014 | ra = &file->f_ra; | 1025 | ra = &file->f_ra; |
1015 | } | 1026 | } |
1016 | 1027 | ||
1017 | pages = kmalloc(sizeof(struct page *) * newer_cluster, | 1028 | pages = kmalloc(sizeof(struct page *) * max_cluster, |
1018 | GFP_NOFS); | 1029 | GFP_NOFS); |
1019 | if (!pages) { | 1030 | if (!pages) { |
1020 | ret = -ENOMEM; | 1031 | ret = -ENOMEM; |
@@ -1023,10 +1034,10 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1023 | 1034 | ||
1024 | /* find the last page to defrag */ | 1035 | /* find the last page to defrag */ |
1025 | if (range->start + range->len > range->start) { | 1036 | if (range->start + range->len > range->start) { |
1026 | last_index = min_t(u64, inode->i_size - 1, | 1037 | last_index = min_t(u64, isize - 1, |
1027 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; | 1038 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; |
1028 | } else { | 1039 | } else { |
1029 | last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 1040 | last_index = (isize - 1) >> PAGE_CACHE_SHIFT; |
1030 | } | 1041 | } |
1031 | 1042 | ||
1032 | if (newer_than) { | 1043 | if (newer_than) { |
@@ -1039,16 +1050,24 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1039 | * the extents in the file evenly spaced | 1050 | * the extents in the file evenly spaced |
1040 | */ | 1051 | */ |
1041 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | 1052 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; |
1042 | newer_left = newer_cluster; | ||
1043 | } else | 1053 | } else |
1044 | goto out_ra; | 1054 | goto out_ra; |
1045 | } else { | 1055 | } else { |
1046 | i = range->start >> PAGE_CACHE_SHIFT; | 1056 | i = range->start >> PAGE_CACHE_SHIFT; |
1047 | } | 1057 | } |
1048 | if (!max_to_defrag) | 1058 | if (!max_to_defrag) |
1049 | max_to_defrag = last_index - 1; | 1059 | max_to_defrag = last_index; |
1060 | |||
1061 | /* | ||
1062 | * make writeback starts from i, so the defrag range can be | ||
1063 | * written sequentially. | ||
1064 | */ | ||
1065 | if (i < inode->i_mapping->writeback_index) | ||
1066 | inode->i_mapping->writeback_index = i; | ||
1050 | 1067 | ||
1051 | while (i <= last_index && defrag_count < max_to_defrag) { | 1068 | while (i <= last_index && defrag_count < max_to_defrag && |
1069 | (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
1070 | PAGE_CACHE_SHIFT)) { | ||
1052 | /* | 1071 | /* |
1053 | * make sure we stop running if someone unmounts | 1072 | * make sure we stop running if someone unmounts |
1054 | * the FS | 1073 | * the FS |
@@ -1071,18 +1090,31 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1071 | i = max(i + 1, next); | 1090 | i = max(i + 1, next); |
1072 | continue; | 1091 | continue; |
1073 | } | 1092 | } |
1093 | |||
1094 | if (!newer_than) { | ||
1095 | cluster = (PAGE_CACHE_ALIGN(defrag_end) >> | ||
1096 | PAGE_CACHE_SHIFT) - i; | ||
1097 | cluster = min(cluster, max_cluster); | ||
1098 | } else { | ||
1099 | cluster = max_cluster; | ||
1100 | } | ||
1101 | |||
1074 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 1102 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
1075 | BTRFS_I(inode)->force_compress = compress_type; | 1103 | BTRFS_I(inode)->force_compress = compress_type; |
1076 | 1104 | ||
1077 | btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster); | 1105 | if (i + cluster > ra_index) { |
1106 | ra_index = max(i, ra_index); | ||
1107 | btrfs_force_ra(inode->i_mapping, ra, file, ra_index, | ||
1108 | cluster); | ||
1109 | ra_index += max_cluster; | ||
1110 | } | ||
1078 | 1111 | ||
1079 | ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster); | 1112 | ret = cluster_pages_for_defrag(inode, pages, i, cluster); |
1080 | if (ret < 0) | 1113 | if (ret < 0) |
1081 | goto out_ra; | 1114 | goto out_ra; |
1082 | 1115 | ||
1083 | defrag_count += ret; | 1116 | defrag_count += ret; |
1084 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); | 1117 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); |
1085 | i += ret; | ||
1086 | 1118 | ||
1087 | if (newer_than) { | 1119 | if (newer_than) { |
1088 | if (newer_off == (u64)-1) | 1120 | if (newer_off == (u64)-1) |
@@ -1097,12 +1129,17 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1097 | if (!ret) { | 1129 | if (!ret) { |
1098 | range->start = newer_off; | 1130 | range->start = newer_off; |
1099 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | 1131 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; |
1100 | newer_left = newer_cluster; | ||
1101 | } else { | 1132 | } else { |
1102 | break; | 1133 | break; |
1103 | } | 1134 | } |
1104 | } else { | 1135 | } else { |
1105 | i++; | 1136 | if (ret > 0) { |
1137 | i += ret; | ||
1138 | last_len += ret << PAGE_CACHE_SHIFT; | ||
1139 | } else { | ||
1140 | i++; | ||
1141 | last_len = 0; | ||
1142 | } | ||
1106 | } | 1143 | } |
1107 | } | 1144 | } |
1108 | 1145 | ||
@@ -1128,16 +1165,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1128 | mutex_unlock(&inode->i_mutex); | 1165 | mutex_unlock(&inode->i_mutex); |
1129 | } | 1166 | } |
1130 | 1167 | ||
1131 | disk_super = &root->fs_info->super_copy; | 1168 | disk_super = root->fs_info->super_copy; |
1132 | features = btrfs_super_incompat_flags(disk_super); | 1169 | features = btrfs_super_incompat_flags(disk_super); |
1133 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | 1170 | if (range->compress_type == BTRFS_COMPRESS_LZO) { |
1134 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | 1171 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1135 | btrfs_set_super_incompat_flags(disk_super, features); | 1172 | btrfs_set_super_incompat_flags(disk_super, features); |
1136 | } | 1173 | } |
1137 | 1174 | ||
1138 | if (!file) | 1175 | ret = defrag_count; |
1139 | kfree(ra); | ||
1140 | return defrag_count; | ||
1141 | 1176 | ||
1142 | out_ra: | 1177 | out_ra: |
1143 | if (!file) | 1178 | if (!file) |
@@ -2579,7 +2614,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2579 | return PTR_ERR(trans); | 2614 | return PTR_ERR(trans); |
2580 | } | 2615 | } |
2581 | 2616 | ||
2582 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 2617 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); |
2583 | di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, | 2618 | di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, |
2584 | dir_id, "default", 7, 1); | 2619 | dir_id, "default", 7, 1); |
2585 | if (IS_ERR_OR_NULL(di)) { | 2620 | if (IS_ERR_OR_NULL(di)) { |
@@ -2595,7 +2630,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2595 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2630 | btrfs_mark_buffer_dirty(path->nodes[0]); |
2596 | btrfs_free_path(path); | 2631 | btrfs_free_path(path); |
2597 | 2632 | ||
2598 | disk_super = &root->fs_info->super_copy; | 2633 | disk_super = root->fs_info->super_copy; |
2599 | features = btrfs_super_incompat_flags(disk_super); | 2634 | features = btrfs_super_incompat_flags(disk_super); |
2600 | if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { | 2635 | if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { |
2601 | features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; | 2636 | features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; |
@@ -2862,7 +2897,7 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | |||
2862 | int i; | 2897 | int i; |
2863 | unsigned long rel_ptr; | 2898 | unsigned long rel_ptr; |
2864 | int size; | 2899 | int size; |
2865 | struct btrfs_ioctl_ino_path_args *ipa; | 2900 | struct btrfs_ioctl_ino_path_args *ipa = NULL; |
2866 | struct inode_fs_paths *ipath = NULL; | 2901 | struct inode_fs_paths *ipath = NULL; |
2867 | struct btrfs_path *path; | 2902 | struct btrfs_path *path; |
2868 | 2903 | ||
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fb2605d998e9..f38e452486b8 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -158,8 +158,7 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) | |||
158 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | 158 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) |
159 | { | 159 | { |
160 | int i; | 160 | int i; |
161 | u32 type; | 161 | u32 type, nr; |
162 | u32 nr = btrfs_header_nritems(l); | ||
163 | struct btrfs_item *item; | 162 | struct btrfs_item *item; |
164 | struct btrfs_root_item *ri; | 163 | struct btrfs_root_item *ri; |
165 | struct btrfs_dir_item *di; | 164 | struct btrfs_dir_item *di; |
@@ -172,6 +171,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
172 | struct btrfs_key key; | 171 | struct btrfs_key key; |
173 | struct btrfs_key found_key; | 172 | struct btrfs_key found_key; |
174 | 173 | ||
174 | if (!l) | ||
175 | return; | ||
176 | |||
177 | nr = btrfs_header_nritems(l); | ||
178 | |||
175 | printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", | 179 | printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", |
176 | (unsigned long long)btrfs_header_bytenr(l), nr, | 180 | (unsigned long long)btrfs_header_bytenr(l), nr, |
177 | btrfs_leaf_free_space(root, l)); | 181 | btrfs_leaf_free_space(root, l)); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c new file mode 100644 index 000000000000..cd857119ba8a --- /dev/null +++ b/fs/btrfs/reada.c | |||
@@ -0,0 +1,949 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/writeback.h> | ||
22 | #include <linux/blkdev.h> | ||
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | #include "ctree.h" | ||
27 | #include "volumes.h" | ||
28 | #include "disk-io.h" | ||
29 | #include "transaction.h" | ||
30 | |||
31 | #undef DEBUG | ||
32 | |||
33 | /* | ||
34 | * This is the implementation for the generic read ahead framework. | ||
35 | * | ||
36 | * To trigger a readahead, btrfs_reada_add must be called. It will start | ||
37 | * a read ahead for the given range [start, end) on tree root. The returned | ||
38 | * handle can either be used to wait on the readahead to finish | ||
39 | * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach). | ||
40 | * | ||
41 | * The read ahead works as follows: | ||
42 | * On btrfs_reada_add, the root of the tree is inserted into a radix_tree. | ||
43 | * reada_start_machine will then search for extents to prefetch and trigger | ||
44 | * some reads. When a read finishes for a node, all contained node/leaf | ||
45 | * pointers that lie in the given range will also be enqueued. The reads will | ||
46 | * be triggered in sequential order, thus giving a big win over a naive | ||
47 | * enumeration. It will also make use of multi-device layouts. Each disk | ||
48 | * will have its on read pointer and all disks will by utilized in parallel. | ||
49 | * Also will no two disks read both sides of a mirror simultaneously, as this | ||
50 | * would waste seeking capacity. Instead both disks will read different parts | ||
51 | * of the filesystem. | ||
52 | * Any number of readaheads can be started in parallel. The read order will be | ||
53 | * determined globally, i.e. 2 parallel readaheads will normally finish faster | ||
54 | * than the 2 started one after another. | ||
55 | */ | ||
56 | |||
57 | #define MAX_MIRRORS 2 | ||
58 | #define MAX_IN_FLIGHT 6 | ||
59 | |||
60 | struct reada_extctl { | ||
61 | struct list_head list; | ||
62 | struct reada_control *rc; | ||
63 | u64 generation; | ||
64 | }; | ||
65 | |||
66 | struct reada_extent { | ||
67 | u64 logical; | ||
68 | struct btrfs_key top; | ||
69 | u32 blocksize; | ||
70 | int err; | ||
71 | struct list_head extctl; | ||
72 | struct kref refcnt; | ||
73 | spinlock_t lock; | ||
74 | struct reada_zone *zones[MAX_MIRRORS]; | ||
75 | int nzones; | ||
76 | struct btrfs_device *scheduled_for; | ||
77 | }; | ||
78 | |||
79 | struct reada_zone { | ||
80 | u64 start; | ||
81 | u64 end; | ||
82 | u64 elems; | ||
83 | struct list_head list; | ||
84 | spinlock_t lock; | ||
85 | int locked; | ||
86 | struct btrfs_device *device; | ||
87 | struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */ | ||
88 | int ndevs; | ||
89 | struct kref refcnt; | ||
90 | }; | ||
91 | |||
92 | struct reada_machine_work { | ||
93 | struct btrfs_work work; | ||
94 | struct btrfs_fs_info *fs_info; | ||
95 | }; | ||
96 | |||
97 | static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *); | ||
98 | static void reada_control_release(struct kref *kref); | ||
99 | static void reada_zone_release(struct kref *kref); | ||
100 | static void reada_start_machine(struct btrfs_fs_info *fs_info); | ||
101 | static void __reada_start_machine(struct btrfs_fs_info *fs_info); | ||
102 | |||
103 | static int reada_add_block(struct reada_control *rc, u64 logical, | ||
104 | struct btrfs_key *top, int level, u64 generation); | ||
105 | |||
106 | /* recurses */ | ||
107 | /* in case of err, eb might be NULL */ | ||
108 | static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | ||
109 | u64 start, int err) | ||
110 | { | ||
111 | int level = 0; | ||
112 | int nritems; | ||
113 | int i; | ||
114 | u64 bytenr; | ||
115 | u64 generation; | ||
116 | struct reada_extent *re; | ||
117 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
118 | struct list_head list; | ||
119 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
120 | struct btrfs_device *for_dev; | ||
121 | |||
122 | if (eb) | ||
123 | level = btrfs_header_level(eb); | ||
124 | |||
125 | /* find extent */ | ||
126 | spin_lock(&fs_info->reada_lock); | ||
127 | re = radix_tree_lookup(&fs_info->reada_tree, index); | ||
128 | if (re) | ||
129 | kref_get(&re->refcnt); | ||
130 | spin_unlock(&fs_info->reada_lock); | ||
131 | |||
132 | if (!re) | ||
133 | return -1; | ||
134 | |||
135 | spin_lock(&re->lock); | ||
136 | /* | ||
137 | * just take the full list from the extent. afterwards we | ||
138 | * don't need the lock anymore | ||
139 | */ | ||
140 | list_replace_init(&re->extctl, &list); | ||
141 | for_dev = re->scheduled_for; | ||
142 | re->scheduled_for = NULL; | ||
143 | spin_unlock(&re->lock); | ||
144 | |||
145 | if (err == 0) { | ||
146 | nritems = level ? btrfs_header_nritems(eb) : 0; | ||
147 | generation = btrfs_header_generation(eb); | ||
148 | /* | ||
149 | * FIXME: currently we just set nritems to 0 if this is a leaf, | ||
150 | * effectively ignoring the content. In a next step we could | ||
151 | * trigger more readahead depending from the content, e.g. | ||
152 | * fetch the checksums for the extents in the leaf. | ||
153 | */ | ||
154 | } else { | ||
155 | /* | ||
156 | * this is the error case, the extent buffer has not been | ||
157 | * read correctly. We won't access anything from it and | ||
158 | * just cleanup our data structures. Effectively this will | ||
159 | * cut the branch below this node from read ahead. | ||
160 | */ | ||
161 | nritems = 0; | ||
162 | generation = 0; | ||
163 | } | ||
164 | |||
165 | for (i = 0; i < nritems; i++) { | ||
166 | struct reada_extctl *rec; | ||
167 | u64 n_gen; | ||
168 | struct btrfs_key key; | ||
169 | struct btrfs_key next_key; | ||
170 | |||
171 | btrfs_node_key_to_cpu(eb, &key, i); | ||
172 | if (i + 1 < nritems) | ||
173 | btrfs_node_key_to_cpu(eb, &next_key, i + 1); | ||
174 | else | ||
175 | next_key = re->top; | ||
176 | bytenr = btrfs_node_blockptr(eb, i); | ||
177 | n_gen = btrfs_node_ptr_generation(eb, i); | ||
178 | |||
179 | list_for_each_entry(rec, &list, list) { | ||
180 | struct reada_control *rc = rec->rc; | ||
181 | |||
182 | /* | ||
183 | * if the generation doesn't match, just ignore this | ||
184 | * extctl. This will probably cut off a branch from | ||
185 | * prefetch. Alternatively one could start a new (sub-) | ||
186 | * prefetch for this branch, starting again from root. | ||
187 | * FIXME: move the generation check out of this loop | ||
188 | */ | ||
189 | #ifdef DEBUG | ||
190 | if (rec->generation != generation) { | ||
191 | printk(KERN_DEBUG "generation mismatch for " | ||
192 | "(%llu,%d,%llu) %llu != %llu\n", | ||
193 | key.objectid, key.type, key.offset, | ||
194 | rec->generation, generation); | ||
195 | } | ||
196 | #endif | ||
197 | if (rec->generation == generation && | ||
198 | btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && | ||
199 | btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) | ||
200 | reada_add_block(rc, bytenr, &next_key, | ||
201 | level - 1, n_gen); | ||
202 | } | ||
203 | } | ||
204 | /* | ||
205 | * free extctl records | ||
206 | */ | ||
207 | while (!list_empty(&list)) { | ||
208 | struct reada_control *rc; | ||
209 | struct reada_extctl *rec; | ||
210 | |||
211 | rec = list_first_entry(&list, struct reada_extctl, list); | ||
212 | list_del(&rec->list); | ||
213 | rc = rec->rc; | ||
214 | kfree(rec); | ||
215 | |||
216 | kref_get(&rc->refcnt); | ||
217 | if (atomic_dec_and_test(&rc->elems)) { | ||
218 | kref_put(&rc->refcnt, reada_control_release); | ||
219 | wake_up(&rc->wait); | ||
220 | } | ||
221 | kref_put(&rc->refcnt, reada_control_release); | ||
222 | |||
223 | reada_extent_put(fs_info, re); /* one ref for each entry */ | ||
224 | } | ||
225 | reada_extent_put(fs_info, re); /* our ref */ | ||
226 | if (for_dev) | ||
227 | atomic_dec(&for_dev->reada_in_flight); | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * start is passed separately in case eb in NULL, which may be the case with | ||
234 | * failed I/O | ||
235 | */ | ||
236 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | ||
237 | u64 start, int err) | ||
238 | { | ||
239 | int ret; | ||
240 | |||
241 | ret = __readahead_hook(root, eb, start, err); | ||
242 | |||
243 | reada_start_machine(root->fs_info); | ||
244 | |||
245 | return ret; | ||
246 | } | ||
247 | |||
248 | static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | ||
249 | struct btrfs_device *dev, u64 logical, | ||
250 | struct btrfs_bio *multi) | ||
251 | { | ||
252 | int ret; | ||
253 | int looped = 0; | ||
254 | struct reada_zone *zone; | ||
255 | struct btrfs_block_group_cache *cache = NULL; | ||
256 | u64 start; | ||
257 | u64 end; | ||
258 | int i; | ||
259 | |||
260 | again: | ||
261 | zone = NULL; | ||
262 | spin_lock(&fs_info->reada_lock); | ||
263 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, | ||
264 | logical >> PAGE_CACHE_SHIFT, 1); | ||
265 | if (ret == 1) | ||
266 | kref_get(&zone->refcnt); | ||
267 | spin_unlock(&fs_info->reada_lock); | ||
268 | |||
269 | if (ret == 1) { | ||
270 | if (logical >= zone->start && logical < zone->end) | ||
271 | return zone; | ||
272 | spin_lock(&fs_info->reada_lock); | ||
273 | kref_put(&zone->refcnt, reada_zone_release); | ||
274 | spin_unlock(&fs_info->reada_lock); | ||
275 | } | ||
276 | |||
277 | if (looped) | ||
278 | return NULL; | ||
279 | |||
280 | cache = btrfs_lookup_block_group(fs_info, logical); | ||
281 | if (!cache) | ||
282 | return NULL; | ||
283 | |||
284 | start = cache->key.objectid; | ||
285 | end = start + cache->key.offset - 1; | ||
286 | btrfs_put_block_group(cache); | ||
287 | |||
288 | zone = kzalloc(sizeof(*zone), GFP_NOFS); | ||
289 | if (!zone) | ||
290 | return NULL; | ||
291 | |||
292 | zone->start = start; | ||
293 | zone->end = end; | ||
294 | INIT_LIST_HEAD(&zone->list); | ||
295 | spin_lock_init(&zone->lock); | ||
296 | zone->locked = 0; | ||
297 | kref_init(&zone->refcnt); | ||
298 | zone->elems = 0; | ||
299 | zone->device = dev; /* our device always sits at index 0 */ | ||
300 | for (i = 0; i < multi->num_stripes; ++i) { | ||
301 | /* bounds have already been checked */ | ||
302 | zone->devs[i] = multi->stripes[i].dev; | ||
303 | } | ||
304 | zone->ndevs = multi->num_stripes; | ||
305 | |||
306 | spin_lock(&fs_info->reada_lock); | ||
307 | ret = radix_tree_insert(&dev->reada_zones, | ||
308 | (unsigned long)zone->end >> PAGE_CACHE_SHIFT, | ||
309 | zone); | ||
310 | spin_unlock(&fs_info->reada_lock); | ||
311 | |||
312 | if (ret) { | ||
313 | kfree(zone); | ||
314 | looped = 1; | ||
315 | goto again; | ||
316 | } | ||
317 | |||
318 | return zone; | ||
319 | } | ||
320 | |||
321 | static struct reada_extent *reada_find_extent(struct btrfs_root *root, | ||
322 | u64 logical, | ||
323 | struct btrfs_key *top, int level) | ||
324 | { | ||
325 | int ret; | ||
326 | int looped = 0; | ||
327 | struct reada_extent *re = NULL; | ||
328 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
329 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
330 | struct btrfs_bio *multi = NULL; | ||
331 | struct btrfs_device *dev; | ||
332 | u32 blocksize; | ||
333 | u64 length; | ||
334 | int nzones = 0; | ||
335 | int i; | ||
336 | unsigned long index = logical >> PAGE_CACHE_SHIFT; | ||
337 | |||
338 | again: | ||
339 | spin_lock(&fs_info->reada_lock); | ||
340 | re = radix_tree_lookup(&fs_info->reada_tree, index); | ||
341 | if (re) | ||
342 | kref_get(&re->refcnt); | ||
343 | spin_unlock(&fs_info->reada_lock); | ||
344 | |||
345 | if (re || looped) | ||
346 | return re; | ||
347 | |||
348 | re = kzalloc(sizeof(*re), GFP_NOFS); | ||
349 | if (!re) | ||
350 | return NULL; | ||
351 | |||
352 | blocksize = btrfs_level_size(root, level); | ||
353 | re->logical = logical; | ||
354 | re->blocksize = blocksize; | ||
355 | re->top = *top; | ||
356 | INIT_LIST_HEAD(&re->extctl); | ||
357 | spin_lock_init(&re->lock); | ||
358 | kref_init(&re->refcnt); | ||
359 | |||
360 | /* | ||
361 | * map block | ||
362 | */ | ||
363 | length = blocksize; | ||
364 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &multi, 0); | ||
365 | if (ret || !multi || length < blocksize) | ||
366 | goto error; | ||
367 | |||
368 | if (multi->num_stripes > MAX_MIRRORS) { | ||
369 | printk(KERN_ERR "btrfs readahead: more than %d copies not " | ||
370 | "supported", MAX_MIRRORS); | ||
371 | goto error; | ||
372 | } | ||
373 | |||
374 | for (nzones = 0; nzones < multi->num_stripes; ++nzones) { | ||
375 | struct reada_zone *zone; | ||
376 | |||
377 | dev = multi->stripes[nzones].dev; | ||
378 | zone = reada_find_zone(fs_info, dev, logical, multi); | ||
379 | if (!zone) | ||
380 | break; | ||
381 | |||
382 | re->zones[nzones] = zone; | ||
383 | spin_lock(&zone->lock); | ||
384 | if (!zone->elems) | ||
385 | kref_get(&zone->refcnt); | ||
386 | ++zone->elems; | ||
387 | spin_unlock(&zone->lock); | ||
388 | spin_lock(&fs_info->reada_lock); | ||
389 | kref_put(&zone->refcnt, reada_zone_release); | ||
390 | spin_unlock(&fs_info->reada_lock); | ||
391 | } | ||
392 | re->nzones = nzones; | ||
393 | if (nzones == 0) { | ||
394 | /* not a single zone found, error and out */ | ||
395 | goto error; | ||
396 | } | ||
397 | |||
398 | /* insert extent in reada_tree + all per-device trees, all or nothing */ | ||
399 | spin_lock(&fs_info->reada_lock); | ||
400 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); | ||
401 | if (ret) { | ||
402 | spin_unlock(&fs_info->reada_lock); | ||
403 | if (ret != -ENOMEM) { | ||
404 | /* someone inserted the extent in the meantime */ | ||
405 | looped = 1; | ||
406 | } | ||
407 | goto error; | ||
408 | } | ||
409 | for (i = 0; i < nzones; ++i) { | ||
410 | dev = multi->stripes[i].dev; | ||
411 | ret = radix_tree_insert(&dev->reada_extents, index, re); | ||
412 | if (ret) { | ||
413 | while (--i >= 0) { | ||
414 | dev = multi->stripes[i].dev; | ||
415 | BUG_ON(dev == NULL); | ||
416 | radix_tree_delete(&dev->reada_extents, index); | ||
417 | } | ||
418 | BUG_ON(fs_info == NULL); | ||
419 | radix_tree_delete(&fs_info->reada_tree, index); | ||
420 | spin_unlock(&fs_info->reada_lock); | ||
421 | goto error; | ||
422 | } | ||
423 | } | ||
424 | spin_unlock(&fs_info->reada_lock); | ||
425 | |||
426 | return re; | ||
427 | |||
428 | error: | ||
429 | while (nzones) { | ||
430 | struct reada_zone *zone; | ||
431 | |||
432 | --nzones; | ||
433 | zone = re->zones[nzones]; | ||
434 | kref_get(&zone->refcnt); | ||
435 | spin_lock(&zone->lock); | ||
436 | --zone->elems; | ||
437 | if (zone->elems == 0) { | ||
438 | /* | ||
439 | * no fs_info->reada_lock needed, as this can't be | ||
440 | * the last ref | ||
441 | */ | ||
442 | kref_put(&zone->refcnt, reada_zone_release); | ||
443 | } | ||
444 | spin_unlock(&zone->lock); | ||
445 | |||
446 | spin_lock(&fs_info->reada_lock); | ||
447 | kref_put(&zone->refcnt, reada_zone_release); | ||
448 | spin_unlock(&fs_info->reada_lock); | ||
449 | } | ||
450 | kfree(re); | ||
451 | if (looped) | ||
452 | goto again; | ||
453 | return NULL; | ||
454 | } | ||
455 | |||
456 | static void reada_kref_dummy(struct kref *kr) | ||
457 | { | ||
458 | } | ||
459 | |||
460 | static void reada_extent_put(struct btrfs_fs_info *fs_info, | ||
461 | struct reada_extent *re) | ||
462 | { | ||
463 | int i; | ||
464 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; | ||
465 | |||
466 | spin_lock(&fs_info->reada_lock); | ||
467 | if (!kref_put(&re->refcnt, reada_kref_dummy)) { | ||
468 | spin_unlock(&fs_info->reada_lock); | ||
469 | return; | ||
470 | } | ||
471 | |||
472 | radix_tree_delete(&fs_info->reada_tree, index); | ||
473 | for (i = 0; i < re->nzones; ++i) { | ||
474 | struct reada_zone *zone = re->zones[i]; | ||
475 | |||
476 | radix_tree_delete(&zone->device->reada_extents, index); | ||
477 | } | ||
478 | |||
479 | spin_unlock(&fs_info->reada_lock); | ||
480 | |||
481 | for (i = 0; i < re->nzones; ++i) { | ||
482 | struct reada_zone *zone = re->zones[i]; | ||
483 | |||
484 | kref_get(&zone->refcnt); | ||
485 | spin_lock(&zone->lock); | ||
486 | --zone->elems; | ||
487 | if (zone->elems == 0) { | ||
488 | /* no fs_info->reada_lock needed, as this can't be | ||
489 | * the last ref */ | ||
490 | kref_put(&zone->refcnt, reada_zone_release); | ||
491 | } | ||
492 | spin_unlock(&zone->lock); | ||
493 | |||
494 | spin_lock(&fs_info->reada_lock); | ||
495 | kref_put(&zone->refcnt, reada_zone_release); | ||
496 | spin_unlock(&fs_info->reada_lock); | ||
497 | } | ||
498 | if (re->scheduled_for) | ||
499 | atomic_dec(&re->scheduled_for->reada_in_flight); | ||
500 | |||
501 | kfree(re); | ||
502 | } | ||
503 | |||
504 | static void reada_zone_release(struct kref *kref) | ||
505 | { | ||
506 | struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); | ||
507 | |||
508 | radix_tree_delete(&zone->device->reada_zones, | ||
509 | zone->end >> PAGE_CACHE_SHIFT); | ||
510 | |||
511 | kfree(zone); | ||
512 | } | ||
513 | |||
514 | static void reada_control_release(struct kref *kref) | ||
515 | { | ||
516 | struct reada_control *rc = container_of(kref, struct reada_control, | ||
517 | refcnt); | ||
518 | |||
519 | kfree(rc); | ||
520 | } | ||
521 | |||
522 | static int reada_add_block(struct reada_control *rc, u64 logical, | ||
523 | struct btrfs_key *top, int level, u64 generation) | ||
524 | { | ||
525 | struct btrfs_root *root = rc->root; | ||
526 | struct reada_extent *re; | ||
527 | struct reada_extctl *rec; | ||
528 | |||
529 | re = reada_find_extent(root, logical, top, level); /* takes one ref */ | ||
530 | if (!re) | ||
531 | return -1; | ||
532 | |||
533 | rec = kzalloc(sizeof(*rec), GFP_NOFS); | ||
534 | if (!rec) { | ||
535 | reada_extent_put(root->fs_info, re); | ||
536 | return -1; | ||
537 | } | ||
538 | |||
539 | rec->rc = rc; | ||
540 | rec->generation = generation; | ||
541 | atomic_inc(&rc->elems); | ||
542 | |||
543 | spin_lock(&re->lock); | ||
544 | list_add_tail(&rec->list, &re->extctl); | ||
545 | spin_unlock(&re->lock); | ||
546 | |||
547 | /* leave the ref on the extent */ | ||
548 | |||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | * called with fs_info->reada_lock held | ||
554 | */ | ||
555 | static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock) | ||
556 | { | ||
557 | int i; | ||
558 | unsigned long index = zone->end >> PAGE_CACHE_SHIFT; | ||
559 | |||
560 | for (i = 0; i < zone->ndevs; ++i) { | ||
561 | struct reada_zone *peer; | ||
562 | peer = radix_tree_lookup(&zone->devs[i]->reada_zones, index); | ||
563 | if (peer && peer->device != zone->device) | ||
564 | peer->locked = lock; | ||
565 | } | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * called with fs_info->reada_lock held | ||
570 | */ | ||
571 | static int reada_pick_zone(struct btrfs_device *dev) | ||
572 | { | ||
573 | struct reada_zone *top_zone = NULL; | ||
574 | struct reada_zone *top_locked_zone = NULL; | ||
575 | u64 top_elems = 0; | ||
576 | u64 top_locked_elems = 0; | ||
577 | unsigned long index = 0; | ||
578 | int ret; | ||
579 | |||
580 | if (dev->reada_curr_zone) { | ||
581 | reada_peer_zones_set_lock(dev->reada_curr_zone, 0); | ||
582 | kref_put(&dev->reada_curr_zone->refcnt, reada_zone_release); | ||
583 | dev->reada_curr_zone = NULL; | ||
584 | } | ||
585 | /* pick the zone with the most elements */ | ||
586 | while (1) { | ||
587 | struct reada_zone *zone; | ||
588 | |||
589 | ret = radix_tree_gang_lookup(&dev->reada_zones, | ||
590 | (void **)&zone, index, 1); | ||
591 | if (ret == 0) | ||
592 | break; | ||
593 | index = (zone->end >> PAGE_CACHE_SHIFT) + 1; | ||
594 | if (zone->locked) { | ||
595 | if (zone->elems > top_locked_elems) { | ||
596 | top_locked_elems = zone->elems; | ||
597 | top_locked_zone = zone; | ||
598 | } | ||
599 | } else { | ||
600 | if (zone->elems > top_elems) { | ||
601 | top_elems = zone->elems; | ||
602 | top_zone = zone; | ||
603 | } | ||
604 | } | ||
605 | } | ||
606 | if (top_zone) | ||
607 | dev->reada_curr_zone = top_zone; | ||
608 | else if (top_locked_zone) | ||
609 | dev->reada_curr_zone = top_locked_zone; | ||
610 | else | ||
611 | return 0; | ||
612 | |||
613 | dev->reada_next = dev->reada_curr_zone->start; | ||
614 | kref_get(&dev->reada_curr_zone->refcnt); | ||
615 | reada_peer_zones_set_lock(dev->reada_curr_zone, 1); | ||
616 | |||
617 | return 1; | ||
618 | } | ||
619 | |||
620 | static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | ||
621 | struct btrfs_device *dev) | ||
622 | { | ||
623 | struct reada_extent *re = NULL; | ||
624 | int mirror_num = 0; | ||
625 | struct extent_buffer *eb = NULL; | ||
626 | u64 logical; | ||
627 | u32 blocksize; | ||
628 | int ret; | ||
629 | int i; | ||
630 | int need_kick = 0; | ||
631 | |||
632 | spin_lock(&fs_info->reada_lock); | ||
633 | if (dev->reada_curr_zone == NULL) { | ||
634 | ret = reada_pick_zone(dev); | ||
635 | if (!ret) { | ||
636 | spin_unlock(&fs_info->reada_lock); | ||
637 | return 0; | ||
638 | } | ||
639 | } | ||
640 | /* | ||
641 | * FIXME currently we issue the reads one extent at a time. If we have | ||
642 | * a contiguous block of extents, we could also coagulate them or use | ||
643 | * plugging to speed things up | ||
644 | */ | ||
645 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, | ||
646 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); | ||
647 | if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { | ||
648 | ret = reada_pick_zone(dev); | ||
649 | if (!ret) { | ||
650 | spin_unlock(&fs_info->reada_lock); | ||
651 | return 0; | ||
652 | } | ||
653 | re = NULL; | ||
654 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, | ||
655 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); | ||
656 | } | ||
657 | if (ret == 0) { | ||
658 | spin_unlock(&fs_info->reada_lock); | ||
659 | return 0; | ||
660 | } | ||
661 | dev->reada_next = re->logical + re->blocksize; | ||
662 | kref_get(&re->refcnt); | ||
663 | |||
664 | spin_unlock(&fs_info->reada_lock); | ||
665 | |||
666 | /* | ||
667 | * find mirror num | ||
668 | */ | ||
669 | for (i = 0; i < re->nzones; ++i) { | ||
670 | if (re->zones[i]->device == dev) { | ||
671 | mirror_num = i + 1; | ||
672 | break; | ||
673 | } | ||
674 | } | ||
675 | logical = re->logical; | ||
676 | blocksize = re->blocksize; | ||
677 | |||
678 | spin_lock(&re->lock); | ||
679 | if (re->scheduled_for == NULL) { | ||
680 | re->scheduled_for = dev; | ||
681 | need_kick = 1; | ||
682 | } | ||
683 | spin_unlock(&re->lock); | ||
684 | |||
685 | reada_extent_put(fs_info, re); | ||
686 | |||
687 | if (!need_kick) | ||
688 | return 0; | ||
689 | |||
690 | atomic_inc(&dev->reada_in_flight); | ||
691 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize, | ||
692 | mirror_num, &eb); | ||
693 | if (ret) | ||
694 | __readahead_hook(fs_info->extent_root, NULL, logical, ret); | ||
695 | else if (eb) | ||
696 | __readahead_hook(fs_info->extent_root, eb, eb->start, ret); | ||
697 | |||
698 | if (eb) | ||
699 | free_extent_buffer(eb); | ||
700 | |||
701 | return 1; | ||
702 | |||
703 | } | ||
704 | |||
705 | static void reada_start_machine_worker(struct btrfs_work *work) | ||
706 | { | ||
707 | struct reada_machine_work *rmw; | ||
708 | struct btrfs_fs_info *fs_info; | ||
709 | |||
710 | rmw = container_of(work, struct reada_machine_work, work); | ||
711 | fs_info = rmw->fs_info; | ||
712 | |||
713 | kfree(rmw); | ||
714 | |||
715 | __reada_start_machine(fs_info); | ||
716 | } | ||
717 | |||
718 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) | ||
719 | { | ||
720 | struct btrfs_device *device; | ||
721 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
722 | u64 enqueued; | ||
723 | u64 total = 0; | ||
724 | int i; | ||
725 | |||
726 | do { | ||
727 | enqueued = 0; | ||
728 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
729 | if (atomic_read(&device->reada_in_flight) < | ||
730 | MAX_IN_FLIGHT) | ||
731 | enqueued += reada_start_machine_dev(fs_info, | ||
732 | device); | ||
733 | } | ||
734 | total += enqueued; | ||
735 | } while (enqueued && total < 10000); | ||
736 | |||
737 | if (enqueued == 0) | ||
738 | return; | ||
739 | |||
740 | /* | ||
741 | * If everything is already in the cache, this is effectively single | ||
742 | * threaded. To a) not hold the caller for too long and b) to utilize | ||
743 | * more cores, we broke the loop above after 10000 iterations and now | ||
744 | * enqueue to workers to finish it. This will distribute the load to | ||
745 | * the cores. | ||
746 | */ | ||
747 | for (i = 0; i < 2; ++i) | ||
748 | reada_start_machine(fs_info); | ||
749 | } | ||
750 | |||
751 | static void reada_start_machine(struct btrfs_fs_info *fs_info) | ||
752 | { | ||
753 | struct reada_machine_work *rmw; | ||
754 | |||
755 | rmw = kzalloc(sizeof(*rmw), GFP_NOFS); | ||
756 | if (!rmw) { | ||
757 | /* FIXME we cannot handle this properly right now */ | ||
758 | BUG(); | ||
759 | } | ||
760 | rmw->work.func = reada_start_machine_worker; | ||
761 | rmw->fs_info = fs_info; | ||
762 | |||
763 | btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); | ||
764 | } | ||
765 | |||
766 | #ifdef DEBUG | ||
767 | static void dump_devs(struct btrfs_fs_info *fs_info, int all) | ||
768 | { | ||
769 | struct btrfs_device *device; | ||
770 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
771 | unsigned long index; | ||
772 | int ret; | ||
773 | int i; | ||
774 | int j; | ||
775 | int cnt; | ||
776 | |||
777 | spin_lock(&fs_info->reada_lock); | ||
778 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
779 | printk(KERN_DEBUG "dev %lld has %d in flight\n", device->devid, | ||
780 | atomic_read(&device->reada_in_flight)); | ||
781 | index = 0; | ||
782 | while (1) { | ||
783 | struct reada_zone *zone; | ||
784 | ret = radix_tree_gang_lookup(&device->reada_zones, | ||
785 | (void **)&zone, index, 1); | ||
786 | if (ret == 0) | ||
787 | break; | ||
788 | printk(KERN_DEBUG " zone %llu-%llu elems %llu locked " | ||
789 | "%d devs", zone->start, zone->end, zone->elems, | ||
790 | zone->locked); | ||
791 | for (j = 0; j < zone->ndevs; ++j) { | ||
792 | printk(KERN_CONT " %lld", | ||
793 | zone->devs[j]->devid); | ||
794 | } | ||
795 | if (device->reada_curr_zone == zone) | ||
796 | printk(KERN_CONT " curr off %llu", | ||
797 | device->reada_next - zone->start); | ||
798 | printk(KERN_CONT "\n"); | ||
799 | index = (zone->end >> PAGE_CACHE_SHIFT) + 1; | ||
800 | } | ||
801 | cnt = 0; | ||
802 | index = 0; | ||
803 | while (all) { | ||
804 | struct reada_extent *re = NULL; | ||
805 | |||
806 | ret = radix_tree_gang_lookup(&device->reada_extents, | ||
807 | (void **)&re, index, 1); | ||
808 | if (ret == 0) | ||
809 | break; | ||
810 | printk(KERN_DEBUG | ||
811 | " re: logical %llu size %u empty %d for %lld", | ||
812 | re->logical, re->blocksize, | ||
813 | list_empty(&re->extctl), re->scheduled_for ? | ||
814 | re->scheduled_for->devid : -1); | ||
815 | |||
816 | for (i = 0; i < re->nzones; ++i) { | ||
817 | printk(KERN_CONT " zone %llu-%llu devs", | ||
818 | re->zones[i]->start, | ||
819 | re->zones[i]->end); | ||
820 | for (j = 0; j < re->zones[i]->ndevs; ++j) { | ||
821 | printk(KERN_CONT " %lld", | ||
822 | re->zones[i]->devs[j]->devid); | ||
823 | } | ||
824 | } | ||
825 | printk(KERN_CONT "\n"); | ||
826 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | ||
827 | if (++cnt > 15) | ||
828 | break; | ||
829 | } | ||
830 | } | ||
831 | |||
832 | index = 0; | ||
833 | cnt = 0; | ||
834 | while (all) { | ||
835 | struct reada_extent *re = NULL; | ||
836 | |||
837 | ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re, | ||
838 | index, 1); | ||
839 | if (ret == 0) | ||
840 | break; | ||
841 | if (!re->scheduled_for) { | ||
842 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | ||
843 | continue; | ||
844 | } | ||
845 | printk(KERN_DEBUG | ||
846 | "re: logical %llu size %u list empty %d for %lld", | ||
847 | re->logical, re->blocksize, list_empty(&re->extctl), | ||
848 | re->scheduled_for ? re->scheduled_for->devid : -1); | ||
849 | for (i = 0; i < re->nzones; ++i) { | ||
850 | printk(KERN_CONT " zone %llu-%llu devs", | ||
851 | re->zones[i]->start, | ||
852 | re->zones[i]->end); | ||
853 | for (i = 0; i < re->nzones; ++i) { | ||
854 | printk(KERN_CONT " zone %llu-%llu devs", | ||
855 | re->zones[i]->start, | ||
856 | re->zones[i]->end); | ||
857 | for (j = 0; j < re->zones[i]->ndevs; ++j) { | ||
858 | printk(KERN_CONT " %lld", | ||
859 | re->zones[i]->devs[j]->devid); | ||
860 | } | ||
861 | } | ||
862 | } | ||
863 | printk(KERN_CONT "\n"); | ||
864 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | ||
865 | } | ||
866 | spin_unlock(&fs_info->reada_lock); | ||
867 | } | ||
868 | #endif | ||
869 | |||
870 | /* | ||
871 | * interface | ||
872 | */ | ||
873 | struct reada_control *btrfs_reada_add(struct btrfs_root *root, | ||
874 | struct btrfs_key *key_start, struct btrfs_key *key_end) | ||
875 | { | ||
876 | struct reada_control *rc; | ||
877 | u64 start; | ||
878 | u64 generation; | ||
879 | int level; | ||
880 | struct extent_buffer *node; | ||
881 | static struct btrfs_key max_key = { | ||
882 | .objectid = (u64)-1, | ||
883 | .type = (u8)-1, | ||
884 | .offset = (u64)-1 | ||
885 | }; | ||
886 | |||
887 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
888 | if (!rc) | ||
889 | return ERR_PTR(-ENOMEM); | ||
890 | |||
891 | rc->root = root; | ||
892 | rc->key_start = *key_start; | ||
893 | rc->key_end = *key_end; | ||
894 | atomic_set(&rc->elems, 0); | ||
895 | init_waitqueue_head(&rc->wait); | ||
896 | kref_init(&rc->refcnt); | ||
897 | kref_get(&rc->refcnt); /* one ref for having elements */ | ||
898 | |||
899 | node = btrfs_root_node(root); | ||
900 | start = node->start; | ||
901 | level = btrfs_header_level(node); | ||
902 | generation = btrfs_header_generation(node); | ||
903 | free_extent_buffer(node); | ||
904 | |||
905 | reada_add_block(rc, start, &max_key, level, generation); | ||
906 | |||
907 | reada_start_machine(root->fs_info); | ||
908 | |||
909 | return rc; | ||
910 | } | ||
911 | |||
912 | #ifdef DEBUG | ||
913 | int btrfs_reada_wait(void *handle) | ||
914 | { | ||
915 | struct reada_control *rc = handle; | ||
916 | |||
917 | while (atomic_read(&rc->elems)) { | ||
918 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, | ||
919 | 5 * HZ); | ||
920 | dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); | ||
921 | } | ||
922 | |||
923 | dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); | ||
924 | |||
925 | kref_put(&rc->refcnt, reada_control_release); | ||
926 | |||
927 | return 0; | ||
928 | } | ||
929 | #else | ||
930 | int btrfs_reada_wait(void *handle) | ||
931 | { | ||
932 | struct reada_control *rc = handle; | ||
933 | |||
934 | while (atomic_read(&rc->elems)) { | ||
935 | wait_event(rc->wait, atomic_read(&rc->elems) == 0); | ||
936 | } | ||
937 | |||
938 | kref_put(&rc->refcnt, reada_control_release); | ||
939 | |||
940 | return 0; | ||
941 | } | ||
942 | #endif | ||
943 | |||
944 | void btrfs_reada_detach(void *handle) | ||
945 | { | ||
946 | struct reada_control *rc = handle; | ||
947 | |||
948 | kref_put(&rc->refcnt, reada_control_release); | ||
949 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 59bb1764273d..24d654ce7a06 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2041,8 +2041,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
2041 | BUG_ON(IS_ERR(trans)); | 2041 | BUG_ON(IS_ERR(trans)); |
2042 | trans->block_rsv = rc->block_rsv; | 2042 | trans->block_rsv = rc->block_rsv; |
2043 | 2043 | ||
2044 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, | 2044 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved); |
2045 | min_reserved, 0); | ||
2046 | if (ret) { | 2045 | if (ret) { |
2047 | BUG_ON(ret != -EAGAIN); | 2046 | BUG_ON(ret != -EAGAIN); |
2048 | ret = btrfs_commit_transaction(trans, root); | 2047 | ret = btrfs_commit_transaction(trans, root); |
@@ -2152,8 +2151,7 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
2152 | again: | 2151 | again: |
2153 | if (!err) { | 2152 | if (!err) { |
2154 | num_bytes = rc->merging_rsv_size; | 2153 | num_bytes = rc->merging_rsv_size; |
2155 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | 2154 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); |
2156 | num_bytes); | ||
2157 | if (ret) | 2155 | if (ret) |
2158 | err = ret; | 2156 | err = ret; |
2159 | } | 2157 | } |
@@ -2427,7 +2425,7 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, | |||
2427 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; | 2425 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
2428 | 2426 | ||
2429 | trans->block_rsv = rc->block_rsv; | 2427 | trans->block_rsv = rc->block_rsv; |
2430 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); | 2428 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); |
2431 | if (ret) { | 2429 | if (ret) { |
2432 | if (ret == -EAGAIN) | 2430 | if (ret == -EAGAIN) |
2433 | rc->commit_transaction = 1; | 2431 | rc->commit_transaction = 1; |
@@ -2922,6 +2920,7 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2922 | unsigned long last_index; | 2920 | unsigned long last_index; |
2923 | struct page *page; | 2921 | struct page *page; |
2924 | struct file_ra_state *ra; | 2922 | struct file_ra_state *ra; |
2923 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
2925 | int nr = 0; | 2924 | int nr = 0; |
2926 | int ret = 0; | 2925 | int ret = 0; |
2927 | 2926 | ||
@@ -2956,7 +2955,7 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2956 | ra, NULL, index, | 2955 | ra, NULL, index, |
2957 | last_index + 1 - index); | 2956 | last_index + 1 - index); |
2958 | page = find_or_create_page(inode->i_mapping, index, | 2957 | page = find_or_create_page(inode->i_mapping, index, |
2959 | GFP_NOFS); | 2958 | mask); |
2960 | if (!page) { | 2959 | if (!page) { |
2961 | btrfs_delalloc_release_metadata(inode, | 2960 | btrfs_delalloc_release_metadata(inode, |
2962 | PAGE_CACHE_SIZE); | 2961 | PAGE_CACHE_SIZE); |
@@ -3323,8 +3322,11 @@ static int find_data_references(struct reloc_control *rc, | |||
3323 | } | 3322 | } |
3324 | 3323 | ||
3325 | key.objectid = ref_objectid; | 3324 | key.objectid = ref_objectid; |
3326 | key.offset = ref_offset; | ||
3327 | key.type = BTRFS_EXTENT_DATA_KEY; | 3325 | key.type = BTRFS_EXTENT_DATA_KEY; |
3326 | if (ref_offset > ((u64)-1 << 32)) | ||
3327 | key.offset = 0; | ||
3328 | else | ||
3329 | key.offset = ref_offset; | ||
3328 | 3330 | ||
3329 | path->search_commit_root = 1; | 3331 | path->search_commit_root = 1; |
3330 | path->skip_locking = 1; | 3332 | path->skip_locking = 1; |
@@ -3645,14 +3647,11 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3645 | * btrfs_init_reloc_root will use them when there | 3647 | * btrfs_init_reloc_root will use them when there |
3646 | * is no reservation in transaction handle. | 3648 | * is no reservation in transaction handle. |
3647 | */ | 3649 | */ |
3648 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | 3650 | ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, |
3649 | rc->extent_root->nodesize * 256); | 3651 | rc->extent_root->nodesize * 256); |
3650 | if (ret) | 3652 | if (ret) |
3651 | return ret; | 3653 | return ret; |
3652 | 3654 | ||
3653 | rc->block_rsv->refill_used = 1; | ||
3654 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
3655 | |||
3656 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | 3655 | memset(&rc->cluster, 0, sizeof(rc->cluster)); |
3657 | rc->search_start = rc->block_group->key.objectid; | 3656 | rc->search_start = rc->block_group->key.objectid; |
3658 | rc->extents_found = 0; | 3657 | rc->extents_found = 0; |
@@ -3777,8 +3776,7 @@ restart: | |||
3777 | } | 3776 | } |
3778 | } | 3777 | } |
3779 | 3778 | ||
3780 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | 3779 | ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); |
3781 | rc->block_rsv, 0, 5); | ||
3782 | if (ret < 0) { | 3780 | if (ret < 0) { |
3783 | if (ret != -EAGAIN) { | 3781 | if (ret != -EAGAIN) { |
3784 | err = ret; | 3782 | err = ret; |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index eba42e5fd5fd..94cd3a19e9c8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -33,15 +33,12 @@ | |||
33 | * any can be found. | 33 | * any can be found. |
34 | * | 34 | * |
35 | * Future enhancements: | 35 | * Future enhancements: |
36 | * - To enhance the performance, better read-ahead strategies for the | ||
37 | * extent-tree can be employed. | ||
38 | * - In case an unrepairable extent is encountered, track which files are | 36 | * - In case an unrepairable extent is encountered, track which files are |
39 | * affected and report them | 37 | * affected and report them |
40 | * - In case of a read error on files with nodatasum, map the file and read | 38 | * - In case of a read error on files with nodatasum, map the file and read |
41 | * the extent to trigger a writeback of the good copy | 39 | * the extent to trigger a writeback of the good copy |
42 | * - track and record media errors, throw out bad devices | 40 | * - track and record media errors, throw out bad devices |
43 | * - add a mode to also read unallocated space | 41 | * - add a mode to also read unallocated space |
44 | * - make the prefetch cancellable | ||
45 | */ | 42 | */ |
46 | 43 | ||
47 | struct scrub_bio; | 44 | struct scrub_bio; |
@@ -209,7 +206,7 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
209 | atomic_set(&sdev->in_flight, 0); | 206 | atomic_set(&sdev->in_flight, 0); |
210 | atomic_set(&sdev->fixup_cnt, 0); | 207 | atomic_set(&sdev->fixup_cnt, 0); |
211 | atomic_set(&sdev->cancel_req, 0); | 208 | atomic_set(&sdev->cancel_req, 0); |
212 | sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); | 209 | sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy); |
213 | INIT_LIST_HEAD(&sdev->csum_list); | 210 | INIT_LIST_HEAD(&sdev->csum_list); |
214 | 211 | ||
215 | spin_lock_init(&sdev->list_lock); | 212 | spin_lock_init(&sdev->list_lock); |
@@ -1130,13 +1127,16 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
1130 | int slot; | 1127 | int slot; |
1131 | int i; | 1128 | int i; |
1132 | u64 nstripes; | 1129 | u64 nstripes; |
1133 | int start_stripe; | ||
1134 | struct extent_buffer *l; | 1130 | struct extent_buffer *l; |
1135 | struct btrfs_key key; | 1131 | struct btrfs_key key; |
1136 | u64 physical; | 1132 | u64 physical; |
1137 | u64 logical; | 1133 | u64 logical; |
1138 | u64 generation; | 1134 | u64 generation; |
1139 | int mirror_num; | 1135 | int mirror_num; |
1136 | struct reada_control *reada1; | ||
1137 | struct reada_control *reada2; | ||
1138 | struct btrfs_key key_start; | ||
1139 | struct btrfs_key key_end; | ||
1140 | 1140 | ||
1141 | u64 increment = map->stripe_len; | 1141 | u64 increment = map->stripe_len; |
1142 | u64 offset; | 1142 | u64 offset; |
@@ -1168,81 +1168,67 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
1168 | if (!path) | 1168 | if (!path) |
1169 | return -ENOMEM; | 1169 | return -ENOMEM; |
1170 | 1170 | ||
1171 | path->reada = 2; | ||
1172 | path->search_commit_root = 1; | 1171 | path->search_commit_root = 1; |
1173 | path->skip_locking = 1; | 1172 | path->skip_locking = 1; |
1174 | 1173 | ||
1175 | /* | 1174 | /* |
1176 | * find all extents for each stripe and just read them to get | 1175 | * trigger the readahead for extent tree csum tree and wait for |
1177 | * them into the page cache | 1176 | * completion. During readahead, the scrub is officially paused |
1178 | * FIXME: we can do better. build a more intelligent prefetching | 1177 | * to not hold off transaction commits |
1179 | */ | 1178 | */ |
1180 | logical = base + offset; | 1179 | logical = base + offset; |
1181 | physical = map->stripes[num].physical; | ||
1182 | ret = 0; | ||
1183 | for (i = 0; i < nstripes; ++i) { | ||
1184 | key.objectid = logical; | ||
1185 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
1186 | key.offset = (u64)0; | ||
1187 | |||
1188 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1189 | if (ret < 0) | ||
1190 | goto out_noplug; | ||
1191 | |||
1192 | /* | ||
1193 | * we might miss half an extent here, but that doesn't matter, | ||
1194 | * as it's only the prefetch | ||
1195 | */ | ||
1196 | while (1) { | ||
1197 | l = path->nodes[0]; | ||
1198 | slot = path->slots[0]; | ||
1199 | if (slot >= btrfs_header_nritems(l)) { | ||
1200 | ret = btrfs_next_leaf(root, path); | ||
1201 | if (ret == 0) | ||
1202 | continue; | ||
1203 | if (ret < 0) | ||
1204 | goto out_noplug; | ||
1205 | 1180 | ||
1206 | break; | 1181 | wait_event(sdev->list_wait, |
1207 | } | 1182 | atomic_read(&sdev->in_flight) == 0); |
1208 | btrfs_item_key_to_cpu(l, &key, slot); | 1183 | atomic_inc(&fs_info->scrubs_paused); |
1184 | wake_up(&fs_info->scrub_pause_wait); | ||
1209 | 1185 | ||
1210 | if (key.objectid >= logical + map->stripe_len) | 1186 | /* FIXME it might be better to start readahead at commit root */ |
1211 | break; | 1187 | key_start.objectid = logical; |
1188 | key_start.type = BTRFS_EXTENT_ITEM_KEY; | ||
1189 | key_start.offset = (u64)0; | ||
1190 | key_end.objectid = base + offset + nstripes * increment; | ||
1191 | key_end.type = BTRFS_EXTENT_ITEM_KEY; | ||
1192 | key_end.offset = (u64)0; | ||
1193 | reada1 = btrfs_reada_add(root, &key_start, &key_end); | ||
1194 | |||
1195 | key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | ||
1196 | key_start.type = BTRFS_EXTENT_CSUM_KEY; | ||
1197 | key_start.offset = logical; | ||
1198 | key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | ||
1199 | key_end.type = BTRFS_EXTENT_CSUM_KEY; | ||
1200 | key_end.offset = base + offset + nstripes * increment; | ||
1201 | reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); | ||
1202 | |||
1203 | if (!IS_ERR(reada1)) | ||
1204 | btrfs_reada_wait(reada1); | ||
1205 | if (!IS_ERR(reada2)) | ||
1206 | btrfs_reada_wait(reada2); | ||
1212 | 1207 | ||
1213 | path->slots[0]++; | 1208 | mutex_lock(&fs_info->scrub_lock); |
1214 | } | 1209 | while (atomic_read(&fs_info->scrub_pause_req)) { |
1215 | btrfs_release_path(path); | 1210 | mutex_unlock(&fs_info->scrub_lock); |
1216 | logical += increment; | 1211 | wait_event(fs_info->scrub_pause_wait, |
1217 | physical += map->stripe_len; | 1212 | atomic_read(&fs_info->scrub_pause_req) == 0); |
1218 | cond_resched(); | 1213 | mutex_lock(&fs_info->scrub_lock); |
1219 | } | 1214 | } |
1215 | atomic_dec(&fs_info->scrubs_paused); | ||
1216 | mutex_unlock(&fs_info->scrub_lock); | ||
1217 | wake_up(&fs_info->scrub_pause_wait); | ||
1220 | 1218 | ||
1221 | /* | 1219 | /* |
1222 | * collect all data csums for the stripe to avoid seeking during | 1220 | * collect all data csums for the stripe to avoid seeking during |
1223 | * the scrub. This might currently (crc32) end up to be about 1MB | 1221 | * the scrub. This might currently (crc32) end up to be about 1MB |
1224 | */ | 1222 | */ |
1225 | start_stripe = 0; | ||
1226 | blk_start_plug(&plug); | 1223 | blk_start_plug(&plug); |
1227 | again: | ||
1228 | logical = base + offset + start_stripe * increment; | ||
1229 | for (i = start_stripe; i < nstripes; ++i) { | ||
1230 | ret = btrfs_lookup_csums_range(csum_root, logical, | ||
1231 | logical + map->stripe_len - 1, | ||
1232 | &sdev->csum_list, 1); | ||
1233 | if (ret) | ||
1234 | goto out; | ||
1235 | 1224 | ||
1236 | logical += increment; | ||
1237 | cond_resched(); | ||
1238 | } | ||
1239 | /* | 1225 | /* |
1240 | * now find all extents for each stripe and scrub them | 1226 | * now find all extents for each stripe and scrub them |
1241 | */ | 1227 | */ |
1242 | logical = base + offset + start_stripe * increment; | 1228 | logical = base + offset; |
1243 | physical = map->stripes[num].physical + start_stripe * map->stripe_len; | 1229 | physical = map->stripes[num].physical; |
1244 | ret = 0; | 1230 | ret = 0; |
1245 | for (i = start_stripe; i < nstripes; ++i) { | 1231 | for (i = 0; i < nstripes; ++i) { |
1246 | /* | 1232 | /* |
1247 | * canceled? | 1233 | * canceled? |
1248 | */ | 1234 | */ |
@@ -1271,11 +1257,14 @@ again: | |||
1271 | atomic_dec(&fs_info->scrubs_paused); | 1257 | atomic_dec(&fs_info->scrubs_paused); |
1272 | mutex_unlock(&fs_info->scrub_lock); | 1258 | mutex_unlock(&fs_info->scrub_lock); |
1273 | wake_up(&fs_info->scrub_pause_wait); | 1259 | wake_up(&fs_info->scrub_pause_wait); |
1274 | scrub_free_csums(sdev); | ||
1275 | start_stripe = i; | ||
1276 | goto again; | ||
1277 | } | 1260 | } |
1278 | 1261 | ||
1262 | ret = btrfs_lookup_csums_range(csum_root, logical, | ||
1263 | logical + map->stripe_len - 1, | ||
1264 | &sdev->csum_list, 1); | ||
1265 | if (ret) | ||
1266 | goto out; | ||
1267 | |||
1279 | key.objectid = logical; | 1268 | key.objectid = logical; |
1280 | key.type = BTRFS_EXTENT_ITEM_KEY; | 1269 | key.type = BTRFS_EXTENT_ITEM_KEY; |
1281 | key.offset = (u64)0; | 1270 | key.offset = (u64)0; |
@@ -1371,7 +1360,6 @@ next: | |||
1371 | 1360 | ||
1372 | out: | 1361 | out: |
1373 | blk_finish_plug(&plug); | 1362 | blk_finish_plug(&plug); |
1374 | out_noplug: | ||
1375 | btrfs_free_path(path); | 1363 | btrfs_free_path(path); |
1376 | return ret < 0 ? ret : 0; | 1364 | return ret < 0 ? ret : 0; |
1377 | } | 1365 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 15634d4648d7..57080dffdfc6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/magic.h> | 40 | #include <linux/magic.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/cleancache.h> | 42 | #include <linux/cleancache.h> |
43 | #include <linux/mnt_namespace.h> | ||
43 | #include "compat.h" | 44 | #include "compat.h" |
44 | #include "delayed-inode.h" | 45 | #include "delayed-inode.h" |
45 | #include "ctree.h" | 46 | #include "ctree.h" |
@@ -58,6 +59,7 @@ | |||
58 | #include <trace/events/btrfs.h> | 59 | #include <trace/events/btrfs.h> |
59 | 60 | ||
60 | static const struct super_operations btrfs_super_ops; | 61 | static const struct super_operations btrfs_super_ops; |
62 | static struct file_system_type btrfs_fs_type; | ||
61 | 63 | ||
62 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | 64 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, |
63 | char nbuf[16]) | 65 | char nbuf[16]) |
@@ -162,7 +164,7 @@ enum { | |||
162 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, | 164 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
163 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | 165 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
164 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, | 166 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, |
165 | Opt_inode_cache, Opt_err, | 167 | Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err, |
166 | }; | 168 | }; |
167 | 169 | ||
168 | static match_table_t tokens = { | 170 | static match_table_t tokens = { |
@@ -195,6 +197,8 @@ static match_table_t tokens = { | |||
195 | {Opt_subvolrootid, "subvolrootid=%d"}, | 197 | {Opt_subvolrootid, "subvolrootid=%d"}, |
196 | {Opt_defrag, "autodefrag"}, | 198 | {Opt_defrag, "autodefrag"}, |
197 | {Opt_inode_cache, "inode_cache"}, | 199 | {Opt_inode_cache, "inode_cache"}, |
200 | {Opt_no_space_cache, "no_space_cache"}, | ||
201 | {Opt_recovery, "recovery"}, | ||
198 | {Opt_err, NULL}, | 202 | {Opt_err, NULL}, |
199 | }; | 203 | }; |
200 | 204 | ||
@@ -206,14 +210,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
206 | { | 210 | { |
207 | struct btrfs_fs_info *info = root->fs_info; | 211 | struct btrfs_fs_info *info = root->fs_info; |
208 | substring_t args[MAX_OPT_ARGS]; | 212 | substring_t args[MAX_OPT_ARGS]; |
209 | char *p, *num, *orig; | 213 | char *p, *num, *orig = NULL; |
214 | u64 cache_gen; | ||
210 | int intarg; | 215 | int intarg; |
211 | int ret = 0; | 216 | int ret = 0; |
212 | char *compress_type; | 217 | char *compress_type; |
213 | bool compress_force = false; | 218 | bool compress_force = false; |
214 | 219 | ||
220 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); | ||
221 | if (cache_gen) | ||
222 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | ||
223 | |||
215 | if (!options) | 224 | if (!options) |
216 | return 0; | 225 | goto out; |
217 | 226 | ||
218 | /* | 227 | /* |
219 | * strsep changes the string, duplicate it because parse_options | 228 | * strsep changes the string, duplicate it because parse_options |
@@ -360,9 +369,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
360 | btrfs_set_opt(info->mount_opt, DISCARD); | 369 | btrfs_set_opt(info->mount_opt, DISCARD); |
361 | break; | 370 | break; |
362 | case Opt_space_cache: | 371 | case Opt_space_cache: |
363 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | ||
364 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | 372 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); |
365 | break; | 373 | break; |
374 | case Opt_no_space_cache: | ||
375 | printk(KERN_INFO "btrfs: disabling disk space caching\n"); | ||
376 | btrfs_clear_opt(info->mount_opt, SPACE_CACHE); | ||
377 | break; | ||
366 | case Opt_inode_cache: | 378 | case Opt_inode_cache: |
367 | printk(KERN_INFO "btrfs: enabling inode map caching\n"); | 379 | printk(KERN_INFO "btrfs: enabling inode map caching\n"); |
368 | btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); | 380 | btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); |
@@ -381,6 +393,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
381 | printk(KERN_INFO "btrfs: enabling auto defrag"); | 393 | printk(KERN_INFO "btrfs: enabling auto defrag"); |
382 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); | 394 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); |
383 | break; | 395 | break; |
396 | case Opt_recovery: | ||
397 | printk(KERN_INFO "btrfs: enabling auto recovery"); | ||
398 | btrfs_set_opt(info->mount_opt, RECOVERY); | ||
399 | break; | ||
384 | case Opt_err: | 400 | case Opt_err: |
385 | printk(KERN_INFO "btrfs: unrecognized mount option " | 401 | printk(KERN_INFO "btrfs: unrecognized mount option " |
386 | "'%s'\n", p); | 402 | "'%s'\n", p); |
@@ -391,6 +407,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
391 | } | 407 | } |
392 | } | 408 | } |
393 | out: | 409 | out: |
410 | if (!ret && btrfs_test_opt(root, SPACE_CACHE)) | ||
411 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
394 | kfree(orig); | 412 | kfree(orig); |
395 | return ret; | 413 | return ret; |
396 | } | 414 | } |
@@ -406,12 +424,12 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
406 | u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) | 424 | u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) |
407 | { | 425 | { |
408 | substring_t args[MAX_OPT_ARGS]; | 426 | substring_t args[MAX_OPT_ARGS]; |
409 | char *opts, *orig, *p; | 427 | char *device_name, *opts, *orig, *p; |
410 | int error = 0; | 428 | int error = 0; |
411 | int intarg; | 429 | int intarg; |
412 | 430 | ||
413 | if (!options) | 431 | if (!options) |
414 | goto out; | 432 | return 0; |
415 | 433 | ||
416 | /* | 434 | /* |
417 | * strsep changes the string, duplicate it because parse_options | 435 | * strsep changes the string, duplicate it because parse_options |
@@ -457,29 +475,24 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
457 | } | 475 | } |
458 | break; | 476 | break; |
459 | case Opt_device: | 477 | case Opt_device: |
460 | error = btrfs_scan_one_device(match_strdup(&args[0]), | 478 | device_name = match_strdup(&args[0]); |
479 | if (!device_name) { | ||
480 | error = -ENOMEM; | ||
481 | goto out; | ||
482 | } | ||
483 | error = btrfs_scan_one_device(device_name, | ||
461 | flags, holder, fs_devices); | 484 | flags, holder, fs_devices); |
485 | kfree(device_name); | ||
462 | if (error) | 486 | if (error) |
463 | goto out_free_opts; | 487 | goto out; |
464 | break; | 488 | break; |
465 | default: | 489 | default: |
466 | break; | 490 | break; |
467 | } | 491 | } |
468 | } | 492 | } |
469 | 493 | ||
470 | out_free_opts: | 494 | out: |
471 | kfree(orig); | 495 | kfree(orig); |
472 | out: | ||
473 | /* | ||
474 | * If no subvolume name is specified we use the default one. Allocate | ||
475 | * a copy of the string "." here so that code later in the | ||
476 | * mount path doesn't care if it's the default volume or another one. | ||
477 | */ | ||
478 | if (!*subvol_name) { | ||
479 | *subvol_name = kstrdup(".", GFP_KERNEL); | ||
480 | if (!*subvol_name) | ||
481 | return -ENOMEM; | ||
482 | } | ||
483 | return error; | 496 | return error; |
484 | } | 497 | } |
485 | 498 | ||
@@ -492,7 +505,6 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
492 | struct btrfs_path *path; | 505 | struct btrfs_path *path; |
493 | struct btrfs_key location; | 506 | struct btrfs_key location; |
494 | struct inode *inode; | 507 | struct inode *inode; |
495 | struct dentry *dentry; | ||
496 | u64 dir_id; | 508 | u64 dir_id; |
497 | int new = 0; | 509 | int new = 0; |
498 | 510 | ||
@@ -517,7 +529,7 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
517 | * will mount by default if we haven't been given a specific subvolume | 529 | * will mount by default if we haven't been given a specific subvolume |
518 | * to mount. | 530 | * to mount. |
519 | */ | 531 | */ |
520 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 532 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); |
521 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); | 533 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); |
522 | if (IS_ERR(di)) { | 534 | if (IS_ERR(di)) { |
523 | btrfs_free_path(path); | 535 | btrfs_free_path(path); |
@@ -566,29 +578,7 @@ setup_root: | |||
566 | return dget(sb->s_root); | 578 | return dget(sb->s_root); |
567 | } | 579 | } |
568 | 580 | ||
569 | if (new) { | 581 | return d_obtain_alias(inode); |
570 | const struct qstr name = { .name = "/", .len = 1 }; | ||
571 | |||
572 | /* | ||
573 | * New inode, we need to make the dentry a sibling of s_root so | ||
574 | * everything gets cleaned up properly on unmount. | ||
575 | */ | ||
576 | dentry = d_alloc(sb->s_root, &name); | ||
577 | if (!dentry) { | ||
578 | iput(inode); | ||
579 | return ERR_PTR(-ENOMEM); | ||
580 | } | ||
581 | d_splice_alias(inode, dentry); | ||
582 | } else { | ||
583 | /* | ||
584 | * We found the inode in cache, just find a dentry for it and | ||
585 | * put the reference to the inode we just got. | ||
586 | */ | ||
587 | dentry = d_find_alias(inode); | ||
588 | iput(inode); | ||
589 | } | ||
590 | |||
591 | return dentry; | ||
592 | } | 582 | } |
593 | 583 | ||
594 | static int btrfs_fill_super(struct super_block *sb, | 584 | static int btrfs_fill_super(struct super_block *sb, |
@@ -719,6 +709,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
719 | seq_puts(seq, ",noacl"); | 709 | seq_puts(seq, ",noacl"); |
720 | if (btrfs_test_opt(root, SPACE_CACHE)) | 710 | if (btrfs_test_opt(root, SPACE_CACHE)) |
721 | seq_puts(seq, ",space_cache"); | 711 | seq_puts(seq, ",space_cache"); |
712 | else | ||
713 | seq_puts(seq, ",no_space_cache"); | ||
722 | if (btrfs_test_opt(root, CLEAR_CACHE)) | 714 | if (btrfs_test_opt(root, CLEAR_CACHE)) |
723 | seq_puts(seq, ",clear_cache"); | 715 | seq_puts(seq, ",clear_cache"); |
724 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | 716 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) |
@@ -753,6 +745,137 @@ static int btrfs_set_super(struct super_block *s, void *data) | |||
753 | return set_anon_super(s, data); | 745 | return set_anon_super(s, data); |
754 | } | 746 | } |
755 | 747 | ||
748 | /* | ||
749 | * subvolumes are identified by ino 256 | ||
750 | */ | ||
751 | static inline int is_subvolume_inode(struct inode *inode) | ||
752 | { | ||
753 | if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
754 | return 1; | ||
755 | return 0; | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * This will strip out the subvol=%s argument for an argument string and add | ||
760 | * subvolid=0 to make sure we get the actual tree root for path walking to the | ||
761 | * subvol we want. | ||
762 | */ | ||
763 | static char *setup_root_args(char *args) | ||
764 | { | ||
765 | unsigned copied = 0; | ||
766 | unsigned len = strlen(args) + 2; | ||
767 | char *pos; | ||
768 | char *ret; | ||
769 | |||
770 | /* | ||
771 | * We need the same args as before, but minus | ||
772 | * | ||
773 | * subvol=a | ||
774 | * | ||
775 | * and add | ||
776 | * | ||
777 | * subvolid=0 | ||
778 | * | ||
779 | * which is a difference of 2 characters, so we allocate strlen(args) + | ||
780 | * 2 characters. | ||
781 | */ | ||
782 | ret = kzalloc(len * sizeof(char), GFP_NOFS); | ||
783 | if (!ret) | ||
784 | return NULL; | ||
785 | pos = strstr(args, "subvol="); | ||
786 | |||
787 | /* This shouldn't happen, but just in case.. */ | ||
788 | if (!pos) { | ||
789 | kfree(ret); | ||
790 | return NULL; | ||
791 | } | ||
792 | |||
793 | /* | ||
794 | * The subvol=<> arg is not at the front of the string, copy everybody | ||
795 | * up to that into ret. | ||
796 | */ | ||
797 | if (pos != args) { | ||
798 | *pos = '\0'; | ||
799 | strcpy(ret, args); | ||
800 | copied += strlen(args); | ||
801 | pos++; | ||
802 | } | ||
803 | |||
804 | strncpy(ret + copied, "subvolid=0", len - copied); | ||
805 | |||
806 | /* Length of subvolid=0 */ | ||
807 | copied += 10; | ||
808 | |||
809 | /* | ||
810 | * If there is no , after the subvol= option then we know there's no | ||
811 | * other options and we can just return. | ||
812 | */ | ||
813 | pos = strchr(pos, ','); | ||
814 | if (!pos) | ||
815 | return ret; | ||
816 | |||
817 | /* Copy the rest of the arguments into our buffer */ | ||
818 | strncpy(ret + copied, pos, len - copied); | ||
819 | copied += strlen(pos); | ||
820 | |||
821 | return ret; | ||
822 | } | ||
823 | |||
824 | static struct dentry *mount_subvol(const char *subvol_name, int flags, | ||
825 | const char *device_name, char *data) | ||
826 | { | ||
827 | struct super_block *s; | ||
828 | struct dentry *root; | ||
829 | struct vfsmount *mnt; | ||
830 | struct mnt_namespace *ns_private; | ||
831 | char *newargs; | ||
832 | struct path path; | ||
833 | int error; | ||
834 | |||
835 | newargs = setup_root_args(data); | ||
836 | if (!newargs) | ||
837 | return ERR_PTR(-ENOMEM); | ||
838 | mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, | ||
839 | newargs); | ||
840 | kfree(newargs); | ||
841 | if (IS_ERR(mnt)) | ||
842 | return ERR_CAST(mnt); | ||
843 | |||
844 | ns_private = create_mnt_ns(mnt); | ||
845 | if (IS_ERR(ns_private)) { | ||
846 | mntput(mnt); | ||
847 | return ERR_CAST(ns_private); | ||
848 | } | ||
849 | |||
850 | /* | ||
851 | * This will trigger the automount of the subvol so we can just | ||
852 | * drop the mnt we have here and return the dentry that we | ||
853 | * found. | ||
854 | */ | ||
855 | error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name, | ||
856 | LOOKUP_FOLLOW, &path); | ||
857 | put_mnt_ns(ns_private); | ||
858 | if (error) | ||
859 | return ERR_PTR(error); | ||
860 | |||
861 | if (!is_subvolume_inode(path.dentry->d_inode)) { | ||
862 | path_put(&path); | ||
863 | mntput(mnt); | ||
864 | error = -EINVAL; | ||
865 | printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", | ||
866 | subvol_name); | ||
867 | return ERR_PTR(-EINVAL); | ||
868 | } | ||
869 | |||
870 | /* Get a ref to the sb and the dentry we found and return it */ | ||
871 | s = path.mnt->mnt_sb; | ||
872 | atomic_inc(&s->s_active); | ||
873 | root = dget(path.dentry); | ||
874 | path_put(&path); | ||
875 | down_write(&s->s_umount); | ||
876 | |||
877 | return root; | ||
878 | } | ||
756 | 879 | ||
757 | /* | 880 | /* |
758 | * Find a superblock for the given device / mount point. | 881 | * Find a superblock for the given device / mount point. |
@@ -784,13 +907,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
784 | if (error) | 907 | if (error) |
785 | return ERR_PTR(error); | 908 | return ERR_PTR(error); |
786 | 909 | ||
910 | if (subvol_name) { | ||
911 | root = mount_subvol(subvol_name, flags, device_name, data); | ||
912 | kfree(subvol_name); | ||
913 | return root; | ||
914 | } | ||
915 | |||
787 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); | 916 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); |
788 | if (error) | 917 | if (error) |
789 | goto error_free_subvol_name; | 918 | return ERR_PTR(error); |
790 | 919 | ||
791 | error = btrfs_open_devices(fs_devices, mode, fs_type); | 920 | error = btrfs_open_devices(fs_devices, mode, fs_type); |
792 | if (error) | 921 | if (error) |
793 | goto error_free_subvol_name; | 922 | return ERR_PTR(error); |
794 | 923 | ||
795 | if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { | 924 | if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { |
796 | error = -EACCES; | 925 | error = -EACCES; |
@@ -813,88 +942,57 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
813 | fs_info->fs_devices = fs_devices; | 942 | fs_info->fs_devices = fs_devices; |
814 | tree_root->fs_info = fs_info; | 943 | tree_root->fs_info = fs_info; |
815 | 944 | ||
945 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | ||
946 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | ||
947 | if (!fs_info->super_copy || !fs_info->super_for_commit) { | ||
948 | error = -ENOMEM; | ||
949 | goto error_close_devices; | ||
950 | } | ||
951 | |||
816 | bdev = fs_devices->latest_bdev; | 952 | bdev = fs_devices->latest_bdev; |
817 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); | 953 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); |
818 | if (IS_ERR(s)) | 954 | if (IS_ERR(s)) { |
819 | goto error_s; | 955 | error = PTR_ERR(s); |
956 | goto error_close_devices; | ||
957 | } | ||
820 | 958 | ||
821 | if (s->s_root) { | 959 | if (s->s_root) { |
822 | if ((flags ^ s->s_flags) & MS_RDONLY) { | 960 | if ((flags ^ s->s_flags) & MS_RDONLY) { |
823 | deactivate_locked_super(s); | 961 | deactivate_locked_super(s); |
824 | error = -EBUSY; | 962 | return ERR_PTR(-EBUSY); |
825 | goto error_close_devices; | ||
826 | } | 963 | } |
827 | 964 | ||
828 | btrfs_close_devices(fs_devices); | 965 | btrfs_close_devices(fs_devices); |
829 | kfree(fs_info); | 966 | free_fs_info(fs_info); |
830 | kfree(tree_root); | 967 | kfree(tree_root); |
831 | } else { | 968 | } else { |
832 | char b[BDEVNAME_SIZE]; | 969 | char b[BDEVNAME_SIZE]; |
833 | 970 | ||
834 | s->s_flags = flags | MS_NOSEC; | 971 | s->s_flags = flags | MS_NOSEC; |
835 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 972 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
973 | btrfs_sb(s)->fs_info->bdev_holder = fs_type; | ||
836 | error = btrfs_fill_super(s, fs_devices, data, | 974 | error = btrfs_fill_super(s, fs_devices, data, |
837 | flags & MS_SILENT ? 1 : 0); | 975 | flags & MS_SILENT ? 1 : 0); |
838 | if (error) { | 976 | if (error) { |
839 | deactivate_locked_super(s); | 977 | deactivate_locked_super(s); |
840 | goto error_free_subvol_name; | 978 | return ERR_PTR(error); |
841 | } | 979 | } |
842 | 980 | ||
843 | btrfs_sb(s)->fs_info->bdev_holder = fs_type; | ||
844 | s->s_flags |= MS_ACTIVE; | 981 | s->s_flags |= MS_ACTIVE; |
845 | } | 982 | } |
846 | 983 | ||
847 | /* if they gave us a subvolume name bind mount into that */ | 984 | root = get_default_root(s, subvol_objectid); |
848 | if (strcmp(subvol_name, ".")) { | 985 | if (IS_ERR(root)) { |
849 | struct dentry *new_root; | 986 | deactivate_locked_super(s); |
850 | 987 | return root; | |
851 | root = get_default_root(s, subvol_rootid); | ||
852 | if (IS_ERR(root)) { | ||
853 | error = PTR_ERR(root); | ||
854 | deactivate_locked_super(s); | ||
855 | goto error_free_subvol_name; | ||
856 | } | ||
857 | |||
858 | mutex_lock(&root->d_inode->i_mutex); | ||
859 | new_root = lookup_one_len(subvol_name, root, | ||
860 | strlen(subvol_name)); | ||
861 | mutex_unlock(&root->d_inode->i_mutex); | ||
862 | |||
863 | if (IS_ERR(new_root)) { | ||
864 | dput(root); | ||
865 | deactivate_locked_super(s); | ||
866 | error = PTR_ERR(new_root); | ||
867 | goto error_free_subvol_name; | ||
868 | } | ||
869 | if (!new_root->d_inode) { | ||
870 | dput(root); | ||
871 | dput(new_root); | ||
872 | deactivate_locked_super(s); | ||
873 | error = -ENXIO; | ||
874 | goto error_free_subvol_name; | ||
875 | } | ||
876 | dput(root); | ||
877 | root = new_root; | ||
878 | } else { | ||
879 | root = get_default_root(s, subvol_objectid); | ||
880 | if (IS_ERR(root)) { | ||
881 | error = PTR_ERR(root); | ||
882 | deactivate_locked_super(s); | ||
883 | goto error_free_subvol_name; | ||
884 | } | ||
885 | } | 988 | } |
886 | 989 | ||
887 | kfree(subvol_name); | ||
888 | return root; | 990 | return root; |
889 | 991 | ||
890 | error_s: | ||
891 | error = PTR_ERR(s); | ||
892 | error_close_devices: | 992 | error_close_devices: |
893 | btrfs_close_devices(fs_devices); | 993 | btrfs_close_devices(fs_devices); |
894 | kfree(fs_info); | 994 | free_fs_info(fs_info); |
895 | kfree(tree_root); | 995 | kfree(tree_root); |
896 | error_free_subvol_name: | ||
897 | kfree(subvol_name); | ||
898 | return ERR_PTR(error); | 996 | return ERR_PTR(error); |
899 | } | 997 | } |
900 | 998 | ||
@@ -919,7 +1017,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
919 | if (root->fs_info->fs_devices->rw_devices == 0) | 1017 | if (root->fs_info->fs_devices->rw_devices == 0) |
920 | return -EACCES; | 1018 | return -EACCES; |
921 | 1019 | ||
922 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 1020 | if (btrfs_super_log_root(root->fs_info->super_copy) != 0) |
923 | return -EINVAL; | 1021 | return -EINVAL; |
924 | 1022 | ||
925 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 1023 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
@@ -1085,7 +1183,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1085 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 1183 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
1086 | { | 1184 | { |
1087 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 1185 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
1088 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | 1186 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; |
1089 | struct list_head *head = &root->fs_info->space_info; | 1187 | struct list_head *head = &root->fs_info->space_info; |
1090 | struct btrfs_space_info *found; | 1188 | struct btrfs_space_info *found; |
1091 | u64 total_used = 0; | 1189 | u64 total_used = 0; |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e24b7964a155..29f782cc2cc9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -275,7 +275,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
275 | */ | 275 | */ |
276 | if (num_items > 0 && root != root->fs_info->chunk_root) { | 276 | if (num_items > 0 && root != root->fs_info->chunk_root) { |
277 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 277 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
278 | ret = btrfs_block_rsv_add(NULL, root, | 278 | ret = btrfs_block_rsv_add(root, |
279 | &root->fs_info->trans_block_rsv, | 279 | &root->fs_info->trans_block_rsv, |
280 | num_bytes); | 280 | num_bytes); |
281 | if (ret) | 281 | if (ret) |
@@ -418,8 +418,8 @@ static int should_end_transaction(struct btrfs_trans_handle *trans, | |||
418 | struct btrfs_root *root) | 418 | struct btrfs_root *root) |
419 | { | 419 | { |
420 | int ret; | 420 | int ret; |
421 | ret = btrfs_block_rsv_check(trans, root, | 421 | |
422 | &root->fs_info->global_block_rsv, 0, 5); | 422 | ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); |
423 | return ret ? 1 : 0; | 423 | return ret ? 1 : 0; |
424 | } | 424 | } |
425 | 425 | ||
@@ -427,17 +427,26 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
427 | struct btrfs_root *root) | 427 | struct btrfs_root *root) |
428 | { | 428 | { |
429 | struct btrfs_transaction *cur_trans = trans->transaction; | 429 | struct btrfs_transaction *cur_trans = trans->transaction; |
430 | struct btrfs_block_rsv *rsv = trans->block_rsv; | ||
430 | int updates; | 431 | int updates; |
431 | 432 | ||
432 | smp_mb(); | 433 | smp_mb(); |
433 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 434 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) |
434 | return 1; | 435 | return 1; |
435 | 436 | ||
437 | /* | ||
438 | * We need to do this in case we're deleting csums so the global block | ||
439 | * rsv get's used instead of the csum block rsv. | ||
440 | */ | ||
441 | trans->block_rsv = NULL; | ||
442 | |||
436 | updates = trans->delayed_ref_updates; | 443 | updates = trans->delayed_ref_updates; |
437 | trans->delayed_ref_updates = 0; | 444 | trans->delayed_ref_updates = 0; |
438 | if (updates) | 445 | if (updates) |
439 | btrfs_run_delayed_refs(trans, root, updates); | 446 | btrfs_run_delayed_refs(trans, root, updates); |
440 | 447 | ||
448 | trans->block_rsv = rsv; | ||
449 | |||
441 | return should_end_transaction(trans, root); | 450 | return should_end_transaction(trans, root); |
442 | } | 451 | } |
443 | 452 | ||
@@ -453,6 +462,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
453 | return 0; | 462 | return 0; |
454 | } | 463 | } |
455 | 464 | ||
465 | btrfs_trans_release_metadata(trans, root); | ||
466 | trans->block_rsv = NULL; | ||
456 | while (count < 4) { | 467 | while (count < 4) { |
457 | unsigned long cur = trans->delayed_ref_updates; | 468 | unsigned long cur = trans->delayed_ref_updates; |
458 | trans->delayed_ref_updates = 0; | 469 | trans->delayed_ref_updates = 0; |
@@ -473,8 +484,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
473 | count++; | 484 | count++; |
474 | } | 485 | } |
475 | 486 | ||
476 | btrfs_trans_release_metadata(trans, root); | ||
477 | |||
478 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 487 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
479 | should_end_transaction(trans, root)) { | 488 | should_end_transaction(trans, root)) { |
480 | trans->transaction->blocked = 1; | 489 | trans->transaction->blocked = 1; |
@@ -562,50 +571,21 @@ int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, | |||
562 | int btrfs_write_marked_extents(struct btrfs_root *root, | 571 | int btrfs_write_marked_extents(struct btrfs_root *root, |
563 | struct extent_io_tree *dirty_pages, int mark) | 572 | struct extent_io_tree *dirty_pages, int mark) |
564 | { | 573 | { |
565 | int ret; | ||
566 | int err = 0; | 574 | int err = 0; |
567 | int werr = 0; | 575 | int werr = 0; |
568 | struct page *page; | 576 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
569 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
570 | u64 start = 0; | 577 | u64 start = 0; |
571 | u64 end; | 578 | u64 end; |
572 | unsigned long index; | ||
573 | 579 | ||
574 | while (1) { | 580 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
575 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | 581 | mark)) { |
576 | mark); | 582 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, |
577 | if (ret) | 583 | GFP_NOFS); |
578 | break; | 584 | err = filemap_fdatawrite_range(mapping, start, end); |
579 | while (start <= end) { | 585 | if (err) |
580 | cond_resched(); | 586 | werr = err; |
581 | 587 | cond_resched(); | |
582 | index = start >> PAGE_CACHE_SHIFT; | 588 | start = end + 1; |
583 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
584 | page = find_get_page(btree_inode->i_mapping, index); | ||
585 | if (!page) | ||
586 | continue; | ||
587 | |||
588 | btree_lock_page_hook(page); | ||
589 | if (!page->mapping) { | ||
590 | unlock_page(page); | ||
591 | page_cache_release(page); | ||
592 | continue; | ||
593 | } | ||
594 | |||
595 | if (PageWriteback(page)) { | ||
596 | if (PageDirty(page)) | ||
597 | wait_on_page_writeback(page); | ||
598 | else { | ||
599 | unlock_page(page); | ||
600 | page_cache_release(page); | ||
601 | continue; | ||
602 | } | ||
603 | } | ||
604 | err = write_one_page(page, 0); | ||
605 | if (err) | ||
606 | werr = err; | ||
607 | page_cache_release(page); | ||
608 | } | ||
609 | } | 589 | } |
610 | if (err) | 590 | if (err) |
611 | werr = err; | 591 | werr = err; |
@@ -621,39 +601,20 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
621 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 601 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
622 | struct extent_io_tree *dirty_pages, int mark) | 602 | struct extent_io_tree *dirty_pages, int mark) |
623 | { | 603 | { |
624 | int ret; | ||
625 | int err = 0; | 604 | int err = 0; |
626 | int werr = 0; | 605 | int werr = 0; |
627 | struct page *page; | 606 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
628 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
629 | u64 start = 0; | 607 | u64 start = 0; |
630 | u64 end; | 608 | u64 end; |
631 | unsigned long index; | ||
632 | 609 | ||
633 | while (1) { | 610 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
634 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | 611 | EXTENT_NEED_WAIT)) { |
635 | mark); | 612 | clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); |
636 | if (ret) | 613 | err = filemap_fdatawait_range(mapping, start, end); |
637 | break; | 614 | if (err) |
638 | 615 | werr = err; | |
639 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | 616 | cond_resched(); |
640 | while (start <= end) { | 617 | start = end + 1; |
641 | index = start >> PAGE_CACHE_SHIFT; | ||
642 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
643 | page = find_get_page(btree_inode->i_mapping, index); | ||
644 | if (!page) | ||
645 | continue; | ||
646 | if (PageDirty(page)) { | ||
647 | btree_lock_page_hook(page); | ||
648 | wait_on_page_writeback(page); | ||
649 | err = write_one_page(page, 0); | ||
650 | if (err) | ||
651 | werr = err; | ||
652 | } | ||
653 | wait_on_page_writeback(page); | ||
654 | page_cache_release(page); | ||
655 | cond_resched(); | ||
656 | } | ||
657 | } | 618 | } |
658 | if (err) | 619 | if (err) |
659 | werr = err; | 620 | werr = err; |
@@ -673,7 +634,12 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
673 | 634 | ||
674 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); | 635 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); |
675 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); | 636 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); |
676 | return ret || ret2; | 637 | |
638 | if (ret) | ||
639 | return ret; | ||
640 | if (ret2) | ||
641 | return ret2; | ||
642 | return 0; | ||
677 | } | 643 | } |
678 | 644 | ||
679 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 645 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
@@ -911,10 +877,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
911 | } | 877 | } |
912 | 878 | ||
913 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | 879 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); |
914 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
915 | 880 | ||
916 | if (to_reserve > 0) { | 881 | if (to_reserve > 0) { |
917 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | 882 | ret = btrfs_block_rsv_add(root, &pending->block_rsv, |
918 | to_reserve); | 883 | to_reserve); |
919 | if (ret) { | 884 | if (ret) { |
920 | pending->error = ret; | 885 | pending->error = ret; |
@@ -1002,7 +967,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1002 | BUG_ON(IS_ERR(pending->snap)); | 967 | BUG_ON(IS_ERR(pending->snap)); |
1003 | 968 | ||
1004 | btrfs_reloc_post_snapshot(trans, pending); | 969 | btrfs_reloc_post_snapshot(trans, pending); |
1005 | btrfs_orphan_post_snapshot(trans, pending); | ||
1006 | fail: | 970 | fail: |
1007 | kfree(new_root_item); | 971 | kfree(new_root_item); |
1008 | trans->block_rsv = rsv; | 972 | trans->block_rsv = rsv; |
@@ -1032,7 +996,7 @@ static void update_super_roots(struct btrfs_root *root) | |||
1032 | struct btrfs_root_item *root_item; | 996 | struct btrfs_root_item *root_item; |
1033 | struct btrfs_super_block *super; | 997 | struct btrfs_super_block *super; |
1034 | 998 | ||
1035 | super = &root->fs_info->super_copy; | 999 | super = root->fs_info->super_copy; |
1036 | 1000 | ||
1037 | root_item = &root->fs_info->chunk_root->root_item; | 1001 | root_item = &root->fs_info->chunk_root->root_item; |
1038 | super->chunk_root = root_item->bytenr; | 1002 | super->chunk_root = root_item->bytenr; |
@@ -1043,7 +1007,7 @@ static void update_super_roots(struct btrfs_root *root) | |||
1043 | super->root = root_item->bytenr; | 1007 | super->root = root_item->bytenr; |
1044 | super->generation = root_item->generation; | 1008 | super->generation = root_item->generation; |
1045 | super->root_level = root_item->level; | 1009 | super->root_level = root_item->level; |
1046 | if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) | 1010 | if (btrfs_test_opt(root, SPACE_CACHE)) |
1047 | super->cache_generation = root_item->generation; | 1011 | super->cache_generation = root_item->generation; |
1048 | } | 1012 | } |
1049 | 1013 | ||
@@ -1168,14 +1132,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1168 | 1132 | ||
1169 | btrfs_run_ordered_operations(root, 0); | 1133 | btrfs_run_ordered_operations(root, 0); |
1170 | 1134 | ||
1135 | btrfs_trans_release_metadata(trans, root); | ||
1136 | trans->block_rsv = NULL; | ||
1137 | |||
1171 | /* make a pass through all the delayed refs we have so far | 1138 | /* make a pass through all the delayed refs we have so far |
1172 | * any runnings procs may add more while we are here | 1139 | * any runnings procs may add more while we are here |
1173 | */ | 1140 | */ |
1174 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1141 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1175 | BUG_ON(ret); | 1142 | BUG_ON(ret); |
1176 | 1143 | ||
1177 | btrfs_trans_release_metadata(trans, root); | ||
1178 | |||
1179 | cur_trans = trans->transaction; | 1144 | cur_trans = trans->transaction; |
1180 | /* | 1145 | /* |
1181 | * set the flushing flag so procs in this transaction have to | 1146 | * set the flushing flag so procs in this transaction have to |
@@ -1341,12 +1306,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1341 | update_super_roots(root); | 1306 | update_super_roots(root); |
1342 | 1307 | ||
1343 | if (!root->fs_info->log_root_recovering) { | 1308 | if (!root->fs_info->log_root_recovering) { |
1344 | btrfs_set_super_log_root(&root->fs_info->super_copy, 0); | 1309 | btrfs_set_super_log_root(root->fs_info->super_copy, 0); |
1345 | btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); | 1310 | btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); |
1346 | } | 1311 | } |
1347 | 1312 | ||
1348 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | 1313 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, |
1349 | sizeof(root->fs_info->super_copy)); | 1314 | sizeof(*root->fs_info->super_copy)); |
1350 | 1315 | ||
1351 | trans->transaction->blocked = 0; | 1316 | trans->transaction->blocked = 0; |
1352 | spin_lock(&root->fs_info->trans_lock); | 1317 | spin_lock(&root->fs_info->trans_lock); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 786639fca067..f4d81c06d48f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log, | |||
276 | struct walk_control *wc, u64 gen) | 276 | struct walk_control *wc, u64 gen) |
277 | { | 277 | { |
278 | if (wc->pin) | 278 | if (wc->pin) |
279 | btrfs_pin_extent(log->fs_info->extent_root, | 279 | btrfs_pin_extent_for_log_replay(wc->trans, |
280 | eb->start, eb->len, 0); | 280 | log->fs_info->extent_root, |
281 | eb->start, eb->len); | ||
281 | 282 | ||
282 | if (btrfs_buffer_uptodate(eb, gen)) { | 283 | if (btrfs_buffer_uptodate(eb, gen)) { |
283 | if (wc->write) | 284 | if (wc->write) |
@@ -1760,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1760 | 1761 | ||
1761 | WARN_ON(root_owner != | 1762 | WARN_ON(root_owner != |
1762 | BTRFS_TREE_LOG_OBJECTID); | 1763 | BTRFS_TREE_LOG_OBJECTID); |
1763 | ret = btrfs_free_reserved_extent(root, | 1764 | ret = btrfs_free_and_pin_reserved_extent(root, |
1764 | bytenr, blocksize); | 1765 | bytenr, blocksize); |
1765 | BUG_ON(ret); | 1766 | BUG_ON(ret); |
1766 | } | 1767 | } |
@@ -1828,7 +1829,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1828 | btrfs_tree_unlock(next); | 1829 | btrfs_tree_unlock(next); |
1829 | 1830 | ||
1830 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | 1831 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); |
1831 | ret = btrfs_free_reserved_extent(root, | 1832 | ret = btrfs_free_and_pin_reserved_extent(root, |
1832 | path->nodes[*level]->start, | 1833 | path->nodes[*level]->start, |
1833 | path->nodes[*level]->len); | 1834 | path->nodes[*level]->len); |
1834 | BUG_ON(ret); | 1835 | BUG_ON(ret); |
@@ -1897,7 +1898,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1897 | 1898 | ||
1898 | WARN_ON(log->root_key.objectid != | 1899 | WARN_ON(log->root_key.objectid != |
1899 | BTRFS_TREE_LOG_OBJECTID); | 1900 | BTRFS_TREE_LOG_OBJECTID); |
1900 | ret = btrfs_free_reserved_extent(log, next->start, | 1901 | ret = btrfs_free_and_pin_reserved_extent(log, next->start, |
1901 | next->len); | 1902 | next->len); |
1902 | BUG_ON(ret); | 1903 | BUG_ON(ret); |
1903 | } | 1904 | } |
@@ -2013,10 +2014,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2013 | /* wait for previous tree log sync to complete */ | 2014 | /* wait for previous tree log sync to complete */ |
2014 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2015 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
2015 | wait_log_commit(trans, root, root->log_transid - 1); | 2016 | wait_log_commit(trans, root, root->log_transid - 1); |
2016 | |||
2017 | while (1) { | 2017 | while (1) { |
2018 | unsigned long batch = root->log_batch; | 2018 | unsigned long batch = root->log_batch; |
2019 | if (root->log_multiple_pids) { | 2019 | /* when we're on an ssd, just kick the log commit out */ |
2020 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | ||
2020 | mutex_unlock(&root->log_mutex); | 2021 | mutex_unlock(&root->log_mutex); |
2021 | schedule_timeout_uninterruptible(1); | 2022 | schedule_timeout_uninterruptible(1); |
2022 | mutex_lock(&root->log_mutex); | 2023 | mutex_lock(&root->log_mutex); |
@@ -2117,9 +2118,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2117 | BUG_ON(ret); | 2118 | BUG_ON(ret); |
2118 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2119 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2119 | 2120 | ||
2120 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, | 2121 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
2121 | log_root_tree->node->start); | 2122 | log_root_tree->node->start); |
2122 | btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, | 2123 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, |
2123 | btrfs_header_level(log_root_tree->node)); | 2124 | btrfs_header_level(log_root_tree->node)); |
2124 | 2125 | ||
2125 | log_root_tree->log_batch = 0; | 2126 | log_root_tree->log_batch = 0; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 18baac5a3f6c..f8e2943101a1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -366,6 +366,14 @@ static noinline int device_list_add(const char *path, | |||
366 | } | 366 | } |
367 | INIT_LIST_HEAD(&device->dev_alloc_list); | 367 | INIT_LIST_HEAD(&device->dev_alloc_list); |
368 | 368 | ||
369 | /* init readahead state */ | ||
370 | spin_lock_init(&device->reada_lock); | ||
371 | device->reada_curr_zone = NULL; | ||
372 | atomic_set(&device->reada_in_flight, 0); | ||
373 | device->reada_next = 0; | ||
374 | INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); | ||
375 | INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); | ||
376 | |||
369 | mutex_lock(&fs_devices->device_list_mutex); | 377 | mutex_lock(&fs_devices->device_list_mutex); |
370 | list_add_rcu(&device->dev_list, &fs_devices->devices); | 378 | list_add_rcu(&device->dev_list, &fs_devices->devices); |
371 | mutex_unlock(&fs_devices->device_list_mutex); | 379 | mutex_unlock(&fs_devices->device_list_mutex); |
@@ -597,10 +605,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
597 | set_blocksize(bdev, 4096); | 605 | set_blocksize(bdev, 4096); |
598 | 606 | ||
599 | bh = btrfs_read_dev_super(bdev); | 607 | bh = btrfs_read_dev_super(bdev); |
600 | if (!bh) { | 608 | if (!bh) |
601 | ret = -EINVAL; | ||
602 | goto error_close; | 609 | goto error_close; |
603 | } | ||
604 | 610 | ||
605 | disk_super = (struct btrfs_super_block *)bh->b_data; | 611 | disk_super = (struct btrfs_super_block *)bh->b_data; |
606 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 612 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -655,7 +661,7 @@ error: | |||
655 | continue; | 661 | continue; |
656 | } | 662 | } |
657 | if (fs_devices->open_devices == 0) { | 663 | if (fs_devices->open_devices == 0) { |
658 | ret = -EIO; | 664 | ret = -EINVAL; |
659 | goto out; | 665 | goto out; |
660 | } | 666 | } |
661 | fs_devices->seeding = seeding; | 667 | fs_devices->seeding = seeding; |
@@ -1013,8 +1019,13 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | |||
1013 | } | 1019 | } |
1014 | BUG_ON(ret); | 1020 | BUG_ON(ret); |
1015 | 1021 | ||
1016 | if (device->bytes_used > 0) | 1022 | if (device->bytes_used > 0) { |
1017 | device->bytes_used -= btrfs_dev_extent_length(leaf, extent); | 1023 | u64 len = btrfs_dev_extent_length(leaf, extent); |
1024 | device->bytes_used -= len; | ||
1025 | spin_lock(&root->fs_info->free_chunk_lock); | ||
1026 | root->fs_info->free_chunk_space += len; | ||
1027 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
1028 | } | ||
1018 | ret = btrfs_del_item(trans, root, path); | 1029 | ret = btrfs_del_item(trans, root, path); |
1019 | 1030 | ||
1020 | out: | 1031 | out: |
@@ -1356,6 +1367,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1356 | if (ret) | 1367 | if (ret) |
1357 | goto error_undo; | 1368 | goto error_undo; |
1358 | 1369 | ||
1370 | spin_lock(&root->fs_info->free_chunk_lock); | ||
1371 | root->fs_info->free_chunk_space = device->total_bytes - | ||
1372 | device->bytes_used; | ||
1373 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
1374 | |||
1359 | device->in_fs_metadata = 0; | 1375 | device->in_fs_metadata = 0; |
1360 | btrfs_scrub_cancel_dev(root, device); | 1376 | btrfs_scrub_cancel_dev(root, device); |
1361 | 1377 | ||
@@ -1387,8 +1403,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1387 | call_rcu(&device->rcu, free_device); | 1403 | call_rcu(&device->rcu, free_device); |
1388 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 1404 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
1389 | 1405 | ||
1390 | num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | 1406 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
1391 | btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); | 1407 | btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); |
1392 | 1408 | ||
1393 | if (cur_devices->open_devices == 0) { | 1409 | if (cur_devices->open_devices == 0) { |
1394 | struct btrfs_fs_devices *fs_devices; | 1410 | struct btrfs_fs_devices *fs_devices; |
@@ -1450,7 +1466,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, | |||
1450 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 1466 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
1451 | struct btrfs_fs_devices *old_devices; | 1467 | struct btrfs_fs_devices *old_devices; |
1452 | struct btrfs_fs_devices *seed_devices; | 1468 | struct btrfs_fs_devices *seed_devices; |
1453 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | 1469 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; |
1454 | struct btrfs_device *device; | 1470 | struct btrfs_device *device; |
1455 | u64 super_flags; | 1471 | u64 super_flags; |
1456 | 1472 | ||
@@ -1691,15 +1707,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1691 | root->fs_info->fs_devices->num_can_discard++; | 1707 | root->fs_info->fs_devices->num_can_discard++; |
1692 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1708 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
1693 | 1709 | ||
1710 | spin_lock(&root->fs_info->free_chunk_lock); | ||
1711 | root->fs_info->free_chunk_space += device->total_bytes; | ||
1712 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
1713 | |||
1694 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 1714 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
1695 | root->fs_info->fs_devices->rotating = 1; | 1715 | root->fs_info->fs_devices->rotating = 1; |
1696 | 1716 | ||
1697 | total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); | 1717 | total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); |
1698 | btrfs_set_super_total_bytes(&root->fs_info->super_copy, | 1718 | btrfs_set_super_total_bytes(root->fs_info->super_copy, |
1699 | total_bytes + device->total_bytes); | 1719 | total_bytes + device->total_bytes); |
1700 | 1720 | ||
1701 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); | 1721 | total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); |
1702 | btrfs_set_super_num_devices(&root->fs_info->super_copy, | 1722 | btrfs_set_super_num_devices(root->fs_info->super_copy, |
1703 | total_bytes + 1); | 1723 | total_bytes + 1); |
1704 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 1724 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
1705 | 1725 | ||
@@ -1790,7 +1810,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
1790 | struct btrfs_device *device, u64 new_size) | 1810 | struct btrfs_device *device, u64 new_size) |
1791 | { | 1811 | { |
1792 | struct btrfs_super_block *super_copy = | 1812 | struct btrfs_super_block *super_copy = |
1793 | &device->dev_root->fs_info->super_copy; | 1813 | device->dev_root->fs_info->super_copy; |
1794 | u64 old_total = btrfs_super_total_bytes(super_copy); | 1814 | u64 old_total = btrfs_super_total_bytes(super_copy); |
1795 | u64 diff = new_size - device->total_bytes; | 1815 | u64 diff = new_size - device->total_bytes; |
1796 | 1816 | ||
@@ -1849,7 +1869,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | |||
1849 | static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | 1869 | static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 |
1850 | chunk_offset) | 1870 | chunk_offset) |
1851 | { | 1871 | { |
1852 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 1872 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
1853 | struct btrfs_disk_key *disk_key; | 1873 | struct btrfs_disk_key *disk_key; |
1854 | struct btrfs_chunk *chunk; | 1874 | struct btrfs_chunk *chunk; |
1855 | u8 *ptr; | 1875 | u8 *ptr; |
@@ -2175,7 +2195,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2175 | bool retried = false; | 2195 | bool retried = false; |
2176 | struct extent_buffer *l; | 2196 | struct extent_buffer *l; |
2177 | struct btrfs_key key; | 2197 | struct btrfs_key key; |
2178 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2198 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
2179 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2199 | u64 old_total = btrfs_super_total_bytes(super_copy); |
2180 | u64 old_size = device->total_bytes; | 2200 | u64 old_size = device->total_bytes; |
2181 | u64 diff = device->total_bytes - new_size; | 2201 | u64 diff = device->total_bytes - new_size; |
@@ -2192,8 +2212,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2192 | lock_chunks(root); | 2212 | lock_chunks(root); |
2193 | 2213 | ||
2194 | device->total_bytes = new_size; | 2214 | device->total_bytes = new_size; |
2195 | if (device->writeable) | 2215 | if (device->writeable) { |
2196 | device->fs_devices->total_rw_bytes -= diff; | 2216 | device->fs_devices->total_rw_bytes -= diff; |
2217 | spin_lock(&root->fs_info->free_chunk_lock); | ||
2218 | root->fs_info->free_chunk_space -= diff; | ||
2219 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
2220 | } | ||
2197 | unlock_chunks(root); | 2221 | unlock_chunks(root); |
2198 | 2222 | ||
2199 | again: | 2223 | again: |
@@ -2257,6 +2281,9 @@ again: | |||
2257 | device->total_bytes = old_size; | 2281 | device->total_bytes = old_size; |
2258 | if (device->writeable) | 2282 | if (device->writeable) |
2259 | device->fs_devices->total_rw_bytes += diff; | 2283 | device->fs_devices->total_rw_bytes += diff; |
2284 | spin_lock(&root->fs_info->free_chunk_lock); | ||
2285 | root->fs_info->free_chunk_space += diff; | ||
2286 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
2260 | unlock_chunks(root); | 2287 | unlock_chunks(root); |
2261 | goto done; | 2288 | goto done; |
2262 | } | 2289 | } |
@@ -2292,7 +2319,7 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, | |||
2292 | struct btrfs_key *key, | 2319 | struct btrfs_key *key, |
2293 | struct btrfs_chunk *chunk, int item_size) | 2320 | struct btrfs_chunk *chunk, int item_size) |
2294 | { | 2321 | { |
2295 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2322 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
2296 | struct btrfs_disk_key disk_key; | 2323 | struct btrfs_disk_key disk_key; |
2297 | u32 array_size; | 2324 | u32 array_size; |
2298 | u8 *ptr; | 2325 | u8 *ptr; |
@@ -2615,6 +2642,11 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2615 | index++; | 2642 | index++; |
2616 | } | 2643 | } |
2617 | 2644 | ||
2645 | spin_lock(&extent_root->fs_info->free_chunk_lock); | ||
2646 | extent_root->fs_info->free_chunk_space -= (stripe_size * | ||
2647 | map->num_stripes); | ||
2648 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | ||
2649 | |||
2618 | index = 0; | 2650 | index = 0; |
2619 | stripe = &chunk->stripe; | 2651 | stripe = &chunk->stripe; |
2620 | while (index < map->num_stripes) { | 2652 | while (index < map->num_stripes) { |
@@ -3626,15 +3658,20 @@ static int read_one_dev(struct btrfs_root *root, | |||
3626 | fill_device_from_item(leaf, dev_item, device); | 3658 | fill_device_from_item(leaf, dev_item, device); |
3627 | device->dev_root = root->fs_info->dev_root; | 3659 | device->dev_root = root->fs_info->dev_root; |
3628 | device->in_fs_metadata = 1; | 3660 | device->in_fs_metadata = 1; |
3629 | if (device->writeable) | 3661 | if (device->writeable) { |
3630 | device->fs_devices->total_rw_bytes += device->total_bytes; | 3662 | device->fs_devices->total_rw_bytes += device->total_bytes; |
3663 | spin_lock(&root->fs_info->free_chunk_lock); | ||
3664 | root->fs_info->free_chunk_space += device->total_bytes - | ||
3665 | device->bytes_used; | ||
3666 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
3667 | } | ||
3631 | ret = 0; | 3668 | ret = 0; |
3632 | return ret; | 3669 | return ret; |
3633 | } | 3670 | } |
3634 | 3671 | ||
3635 | int btrfs_read_sys_array(struct btrfs_root *root) | 3672 | int btrfs_read_sys_array(struct btrfs_root *root) |
3636 | { | 3673 | { |
3637 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 3674 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
3638 | struct extent_buffer *sb; | 3675 | struct extent_buffer *sb; |
3639 | struct btrfs_disk_key *disk_key; | 3676 | struct btrfs_disk_key *disk_key; |
3640 | struct btrfs_chunk *chunk; | 3677 | struct btrfs_chunk *chunk; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 71f4f3f67495..ab5b1c49f352 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -92,6 +92,14 @@ struct btrfs_device { | |||
92 | struct btrfs_work work; | 92 | struct btrfs_work work; |
93 | struct rcu_head rcu; | 93 | struct rcu_head rcu; |
94 | struct work_struct rcu_work; | 94 | struct work_struct rcu_work; |
95 | |||
96 | /* readahead state */ | ||
97 | spinlock_t reada_lock; | ||
98 | atomic_t reada_in_flight; | ||
99 | u64 reada_next; | ||
100 | struct reada_zone *reada_curr_zone; | ||
101 | struct radix_tree_root reada_zones; | ||
102 | struct radix_tree_root reada_extents; | ||
95 | }; | 103 | }; |
96 | 104 | ||
97 | struct btrfs_fs_devices { | 105 | struct btrfs_fs_devices { |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 69565e5fc6a0..a76e41c04b71 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -127,6 +127,17 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
127 | again: | 127 | again: |
128 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | 128 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), |
129 | name, name_len, value, size); | 129 | name, name_len, value, size); |
130 | /* | ||
131 | * If we're setting an xattr to a new value but the new value is say | ||
132 | * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting | ||
133 | * back from split_leaf. This is because it thinks we'll be extending | ||
134 | * the existing item size, but we're asking for enough space to add the | ||
135 | * item itself. So if we get EOVERFLOW just set ret to EEXIST and let | ||
136 | * the rest of the function figure it out. | ||
137 | */ | ||
138 | if (ret == -EOVERFLOW) | ||
139 | ret = -EEXIST; | ||
140 | |||
130 | if (ret == -EEXIST) { | 141 | if (ret == -EEXIST) { |
131 | if (flags & XATTR_CREATE) | 142 | if (flags & XATTR_CREATE) |
132 | goto out; | 143 | goto out; |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index e76bfeb68267..30acd22147e1 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -351,9 +351,7 @@ static int | |||
351 | build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) | 351 | build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) |
352 | { | 352 | { |
353 | unsigned int dlen; | 353 | unsigned int dlen; |
354 | unsigned int wlen; | 354 | unsigned int size = 2 * sizeof(struct ntlmssp2_name); |
355 | unsigned int size = 6 * sizeof(struct ntlmssp2_name); | ||
356 | __le64 curtime; | ||
357 | char *defdmname = "WORKGROUP"; | 355 | char *defdmname = "WORKGROUP"; |
358 | unsigned char *blobptr; | 356 | unsigned char *blobptr; |
359 | struct ntlmssp2_name *attrptr; | 357 | struct ntlmssp2_name *attrptr; |
@@ -365,15 +363,14 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
365 | } | 363 | } |
366 | 364 | ||
367 | dlen = strlen(ses->domainName); | 365 | dlen = strlen(ses->domainName); |
368 | wlen = strlen(ses->server->hostname); | ||
369 | 366 | ||
370 | /* The length of this blob is a size which is | 367 | /* |
371 | * six times the size of a structure which holds name/size + | 368 | * The length of this blob is two times the size of a |
372 | * two times the unicode length of a domain name + | 369 | * structure (av pair) which holds name/size |
373 | * two times the unicode length of a server name + | 370 | * ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) + |
374 | * size of a timestamp (which is 8 bytes). | 371 | * unicode length of a netbios domain name |
375 | */ | 372 | */ |
376 | ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; | 373 | ses->auth_key.len = size + 2 * dlen; |
377 | ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); | 374 | ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); |
378 | if (!ses->auth_key.response) { | 375 | if (!ses->auth_key.response) { |
379 | ses->auth_key.len = 0; | 376 | ses->auth_key.len = 0; |
@@ -384,44 +381,15 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
384 | blobptr = ses->auth_key.response; | 381 | blobptr = ses->auth_key.response; |
385 | attrptr = (struct ntlmssp2_name *) blobptr; | 382 | attrptr = (struct ntlmssp2_name *) blobptr; |
386 | 383 | ||
384 | /* | ||
385 | * As defined in MS-NTLM 3.3.2, just this av pair field | ||
386 | * is sufficient as part of the temp | ||
387 | */ | ||
387 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); | 388 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); |
388 | attrptr->length = cpu_to_le16(2 * dlen); | 389 | attrptr->length = cpu_to_le16(2 * dlen); |
389 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | 390 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); |
390 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); | 391 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); |
391 | 392 | ||
392 | blobptr += 2 * dlen; | ||
393 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
394 | |||
395 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_COMPUTER_NAME); | ||
396 | attrptr->length = cpu_to_le16(2 * wlen); | ||
397 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
398 | cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); | ||
399 | |||
400 | blobptr += 2 * wlen; | ||
401 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
402 | |||
403 | attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_DOMAIN_NAME); | ||
404 | attrptr->length = cpu_to_le16(2 * dlen); | ||
405 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
406 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); | ||
407 | |||
408 | blobptr += 2 * dlen; | ||
409 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
410 | |||
411 | attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_COMPUTER_NAME); | ||
412 | attrptr->length = cpu_to_le16(2 * wlen); | ||
413 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
414 | cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); | ||
415 | |||
416 | blobptr += 2 * wlen; | ||
417 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
418 | |||
419 | attrptr->type = cpu_to_le16(NTLMSSP_AV_TIMESTAMP); | ||
420 | attrptr->length = cpu_to_le16(sizeof(__le64)); | ||
421 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
422 | curtime = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); | ||
423 | memcpy(blobptr, &curtime, sizeof(__le64)); | ||
424 | |||
425 | return 0; | 393 | return 0; |
426 | } | 394 | } |
427 | 395 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f93eb948d071..54b8f1e7da94 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -548,6 +548,12 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
548 | struct inode *dir = dentry->d_inode; | 548 | struct inode *dir = dentry->d_inode; |
549 | struct dentry *child; | 549 | struct dentry *child; |
550 | 550 | ||
551 | if (!dir) { | ||
552 | dput(dentry); | ||
553 | dentry = ERR_PTR(-ENOENT); | ||
554 | break; | ||
555 | } | ||
556 | |||
551 | /* skip separators */ | 557 | /* skip separators */ |
552 | while (*s == sep) | 558 | while (*s == sep) |
553 | s++; | 559 | s++; |
@@ -563,10 +569,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
563 | mutex_unlock(&dir->i_mutex); | 569 | mutex_unlock(&dir->i_mutex); |
564 | dput(dentry); | 570 | dput(dentry); |
565 | dentry = child; | 571 | dentry = child; |
566 | if (!dentry->d_inode) { | ||
567 | dput(dentry); | ||
568 | dentry = ERR_PTR(-ENOENT); | ||
569 | } | ||
570 | } while (!IS_ERR(dentry)); | 572 | } while (!IS_ERR(dentry)); |
571 | _FreeXid(xid); | 573 | _FreeXid(xid); |
572 | kfree(full_path); | 574 | kfree(full_path); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index aac37d99a487..a80f7bd97b90 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -4079,7 +4079,8 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon, | |||
4079 | T2_FNEXT_RSP_PARMS *parms; | 4079 | T2_FNEXT_RSP_PARMS *parms; |
4080 | char *response_data; | 4080 | char *response_data; |
4081 | int rc = 0; | 4081 | int rc = 0; |
4082 | int bytes_returned, name_len; | 4082 | int bytes_returned; |
4083 | unsigned int name_len; | ||
4083 | __u16 params, byte_count; | 4084 | __u16 params, byte_count; |
4084 | 4085 | ||
4085 | cFYI(1, "In FindNext"); | 4086 | cFYI(1, "In FindNext"); |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 633c246b6775..71beb0201970 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -1298,7 +1298,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1298 | /* ignore */ | 1298 | /* ignore */ |
1299 | } else if (strnicmp(data, "guest", 5) == 0) { | 1299 | } else if (strnicmp(data, "guest", 5) == 0) { |
1300 | /* ignore */ | 1300 | /* ignore */ |
1301 | } else if (strnicmp(data, "rw", 2) == 0) { | 1301 | } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) { |
1302 | /* ignore */ | 1302 | /* ignore */ |
1303 | } else if (strnicmp(data, "ro", 2) == 0) { | 1303 | } else if (strnicmp(data, "ro", 2) == 0) { |
1304 | /* ignore */ | 1304 | /* ignore */ |
@@ -1401,7 +1401,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1401 | vol->server_ino = 1; | 1401 | vol->server_ino = 1; |
1402 | } else if (strnicmp(data, "noserverino", 9) == 0) { | 1402 | } else if (strnicmp(data, "noserverino", 9) == 0) { |
1403 | vol->server_ino = 0; | 1403 | vol->server_ino = 0; |
1404 | } else if (strnicmp(data, "rwpidforward", 4) == 0) { | 1404 | } else if (strnicmp(data, "rwpidforward", 12) == 0) { |
1405 | vol->rwpidforward = 1; | 1405 | vol->rwpidforward = 1; |
1406 | } else if (strnicmp(data, "cifsacl", 7) == 0) { | 1406 | } else if (strnicmp(data, "cifsacl", 7) == 0) { |
1407 | vol->cifs_acl = 1; | 1407 | vol->cifs_acl = 1; |
@@ -2018,7 +2018,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
2018 | warned_on_ntlm = true; | 2018 | warned_on_ntlm = true; |
2019 | cERROR(1, "default security mechanism requested. The default " | 2019 | cERROR(1, "default security mechanism requested. The default " |
2020 | "security mechanism will be upgraded from ntlm to " | 2020 | "security mechanism will be upgraded from ntlm to " |
2021 | "ntlmv2 in kernel release 3.1"); | 2021 | "ntlmv2 in kernel release 3.2"); |
2022 | } | 2022 | } |
2023 | ses->overrideSecFlg = volume_info->secFlg; | 2023 | ses->overrideSecFlg = volume_info->secFlg; |
2024 | 2024 | ||
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 04da6acde85d..12661e1deedd 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, | |||
1134 | return bh; | 1134 | return bh; |
1135 | if (buffer_uptodate(bh)) | 1135 | if (buffer_uptodate(bh)) |
1136 | return bh; | 1136 | return bh; |
1137 | ll_rw_block(READ_META, 1, &bh); | 1137 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
1138 | wait_on_buffer(bh); | 1138 | wait_on_buffer(bh); |
1139 | if (buffer_uptodate(bh)) | 1139 | if (buffer_uptodate(bh)) |
1140 | return bh; | 1140 | return bh; |
@@ -2807,7 +2807,7 @@ make_io: | |||
2807 | trace_ext3_load_inode(inode); | 2807 | trace_ext3_load_inode(inode); |
2808 | get_bh(bh); | 2808 | get_bh(bh); |
2809 | bh->b_end_io = end_buffer_read_sync; | 2809 | bh->b_end_io = end_buffer_read_sync; |
2810 | submit_bh(READ_META, bh); | 2810 | submit_bh(READ | REQ_META | REQ_PRIO, bh); |
2811 | wait_on_buffer(bh); | 2811 | wait_on_buffer(bh); |
2812 | if (!buffer_uptodate(bh)) { | 2812 | if (!buffer_uptodate(bh)) { |
2813 | ext3_error(inode->i_sb, "ext3_get_inode_loc", | 2813 | ext3_error(inode->i_sb, "ext3_get_inode_loc", |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 5571708b6a58..0629e09f6511 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -922,7 +922,8 @@ restart: | |||
922 | bh = ext3_getblk(NULL, dir, b++, 0, &err); | 922 | bh = ext3_getblk(NULL, dir, b++, 0, &err); |
923 | bh_use[ra_max] = bh; | 923 | bh_use[ra_max] = bh; |
924 | if (bh) | 924 | if (bh) |
925 | ll_rw_block(READ_META, 1, &bh); | 925 | ll_rw_block(READ | REQ_META | REQ_PRIO, |
926 | 1, &bh); | ||
926 | } | 927 | } |
927 | } | 928 | } |
928 | if ((bh = bh_use[ra_ptr++]) == NULL) | 929 | if ((bh = bh_use[ra_ptr++]) == NULL) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 18d2558b7624..986e2388f031 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -647,7 +647,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
647 | return bh; | 647 | return bh; |
648 | if (buffer_uptodate(bh)) | 648 | if (buffer_uptodate(bh)) |
649 | return bh; | 649 | return bh; |
650 | ll_rw_block(READ_META, 1, &bh); | 650 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
651 | wait_on_buffer(bh); | 651 | wait_on_buffer(bh); |
652 | if (buffer_uptodate(bh)) | 652 | if (buffer_uptodate(bh)) |
653 | return bh; | 653 | return bh; |
@@ -3298,7 +3298,7 @@ make_io: | |||
3298 | trace_ext4_load_inode(inode); | 3298 | trace_ext4_load_inode(inode); |
3299 | get_bh(bh); | 3299 | get_bh(bh); |
3300 | bh->b_end_io = end_buffer_read_sync; | 3300 | bh->b_end_io = end_buffer_read_sync; |
3301 | submit_bh(READ_META, bh); | 3301 | submit_bh(READ | REQ_META | REQ_PRIO, bh); |
3302 | wait_on_buffer(bh); | 3302 | wait_on_buffer(bh); |
3303 | if (!buffer_uptodate(bh)) { | 3303 | if (!buffer_uptodate(bh)) { |
3304 | EXT4_ERROR_INODE_BLOCK(inode, block, | 3304 | EXT4_ERROR_INODE_BLOCK(inode, block, |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f8068c7bae9f..1c924faeb6c8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -922,7 +922,8 @@ restart: | |||
922 | bh = ext4_getblk(NULL, dir, b++, 0, &err); | 922 | bh = ext4_getblk(NULL, dir, b++, 0, &err); |
923 | bh_use[ra_max] = bh; | 923 | bh_use[ra_max] = bh; |
924 | if (bh) | 924 | if (bh) |
925 | ll_rw_block(READ_META, 1, &bh); | 925 | ll_rw_block(READ | REQ_META | REQ_PRIO, |
926 | 1, &bh); | ||
926 | } | 927 | } |
927 | } | 928 | } |
928 | if ((bh = bh_use[ra_ptr++]) == NULL) | 929 | if ((bh = bh_use[ra_ptr++]) == NULL) |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 85c62923ee29..598646434362 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
624 | bh->b_end_io = end_buffer_write_sync; | 624 | bh->b_end_io = end_buffer_write_sync; |
625 | get_bh(bh); | 625 | get_bh(bh); |
626 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) | 626 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
627 | submit_bh(WRITE_SYNC | REQ_META, bh); | 627 | submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); |
628 | else | 628 | else |
629 | submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); | 629 | submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); |
630 | wait_on_buffer(bh); | 630 | wait_on_buffer(bh); |
631 | 631 | ||
632 | if (!buffer_uptodate(bh)) | 632 | if (!buffer_uptodate(bh)) |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 747238cd9f96..be29858900f6 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb | |||
37 | { | 37 | { |
38 | struct buffer_head *bh, *head; | 38 | struct buffer_head *bh, *head; |
39 | int nr_underway = 0; | 39 | int nr_underway = 0; |
40 | int write_op = REQ_META | | 40 | int write_op = REQ_META | REQ_PRIO | |
41 | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | 41 | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); |
42 | 42 | ||
43 | BUG_ON(!PageLocked(page)); | 43 | BUG_ON(!PageLocked(page)); |
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | |||
225 | } | 225 | } |
226 | bh->b_end_io = end_buffer_read_sync; | 226 | bh->b_end_io = end_buffer_read_sync; |
227 | get_bh(bh); | 227 | get_bh(bh); |
228 | submit_bh(READ_SYNC | REQ_META, bh); | 228 | submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); |
229 | if (!(flags & DIO_WAIT)) | 229 | if (!(flags & DIO_WAIT)) |
230 | return 0; | 230 | return 0; |
231 | 231 | ||
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
435 | if (buffer_uptodate(first_bh)) | 435 | if (buffer_uptodate(first_bh)) |
436 | goto out; | 436 | goto out; |
437 | if (!buffer_locked(first_bh)) | 437 | if (!buffer_locked(first_bh)) |
438 | ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); | 438 | ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); |
439 | 439 | ||
440 | dblock++; | 440 | dblock++; |
441 | extlen--; | 441 | extlen--; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3bc073a4cf82..079587e53849 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) | |||
224 | 224 | ||
225 | bio->bi_end_io = end_bio_io_page; | 225 | bio->bi_end_io = end_bio_io_page; |
226 | bio->bi_private = page; | 226 | bio->bi_private = page; |
227 | submit_bio(READ_SYNC | REQ_META, bio); | 227 | submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); |
228 | wait_on_page_locked(page); | 228 | wait_on_page_locked(page); |
229 | bio_put(bio); | 229 | bio_put(bio); |
230 | if (!PageUptodate(page)) { | 230 | if (!PageUptodate(page)) { |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 42e8d23bc047..0e8bb13381e4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -709,7 +709,7 @@ get_a_page: | |||
709 | set_buffer_uptodate(bh); | 709 | set_buffer_uptodate(bh); |
710 | 710 | ||
711 | if (!buffer_uptodate(bh)) { | 711 | if (!buffer_uptodate(bh)) { |
712 | ll_rw_block(READ_META, 1, &bh); | 712 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
713 | wait_on_buffer(bh); | 713 | wait_on_buffer(bh); |
714 | if (!buffer_uptodate(bh)) | 714 | if (!buffer_uptodate(bh)) |
715 | goto unlock_out; | 715 | goto unlock_out; |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c106ca22e812..d24a9b666a23 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -344,6 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
344 | struct inode *root, *inode; | 344 | struct inode *root, *inode; |
345 | struct qstr str; | 345 | struct qstr str; |
346 | struct nls_table *nls = NULL; | 346 | struct nls_table *nls = NULL; |
347 | u64 last_fs_block, last_fs_page; | ||
347 | int err; | 348 | int err; |
348 | 349 | ||
349 | err = -EINVAL; | 350 | err = -EINVAL; |
@@ -399,9 +400,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
399 | if (!sbi->rsrc_clump_blocks) | 400 | if (!sbi->rsrc_clump_blocks) |
400 | sbi->rsrc_clump_blocks = 1; | 401 | sbi->rsrc_clump_blocks = 1; |
401 | 402 | ||
402 | err = generic_check_addressable(sbi->alloc_blksz_shift, | 403 | err = -EFBIG; |
403 | sbi->total_blocks); | 404 | last_fs_block = sbi->total_blocks - 1; |
404 | if (err) { | 405 | last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >> |
406 | PAGE_CACHE_SHIFT; | ||
407 | |||
408 | if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) || | ||
409 | (last_fs_page > (pgoff_t)(~0ULL))) { | ||
405 | printk(KERN_ERR "hfs: filesystem size too large.\n"); | 410 | printk(KERN_ERR "hfs: filesystem size too large.\n"); |
406 | goto out_free_vhdr; | 411 | goto out_free_vhdr; |
407 | } | 412 | } |
@@ -525,8 +530,8 @@ out_close_cat_tree: | |||
525 | out_close_ext_tree: | 530 | out_close_ext_tree: |
526 | hfs_btree_close(sbi->ext_tree); | 531 | hfs_btree_close(sbi->ext_tree); |
527 | out_free_vhdr: | 532 | out_free_vhdr: |
528 | kfree(sbi->s_vhdr); | 533 | kfree(sbi->s_vhdr_buf); |
529 | kfree(sbi->s_backup_vhdr); | 534 | kfree(sbi->s_backup_vhdr_buf); |
530 | out_unload_nls: | 535 | out_unload_nls: |
531 | unload_nls(sbi->nls); | 536 | unload_nls(sbi->nls); |
532 | unload_nls(nls); | 537 | unload_nls(nls); |
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 10e515a0d452..7daf4b852d1c 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
@@ -272,9 +272,9 @@ reread: | |||
272 | return 0; | 272 | return 0; |
273 | 273 | ||
274 | out_free_backup_vhdr: | 274 | out_free_backup_vhdr: |
275 | kfree(sbi->s_backup_vhdr); | 275 | kfree(sbi->s_backup_vhdr_buf); |
276 | out_free_vhdr: | 276 | out_free_vhdr: |
277 | kfree(sbi->s_vhdr); | 277 | kfree(sbi->s_vhdr_buf); |
278 | out: | 278 | out: |
279 | return error; | 279 | return error; |
280 | } | 280 | } |
diff --git a/fs/namei.c b/fs/namei.c index b52bc685465f..0b3138de2a3b 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -721,12 +721,6 @@ static int follow_automount(struct path *path, unsigned flags, | |||
721 | if (!path->dentry->d_op || !path->dentry->d_op->d_automount) | 721 | if (!path->dentry->d_op || !path->dentry->d_op->d_automount) |
722 | return -EREMOTE; | 722 | return -EREMOTE; |
723 | 723 | ||
724 | /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT | ||
725 | * and this is the terminal part of the path. | ||
726 | */ | ||
727 | if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) | ||
728 | return -EISDIR; /* we actually want to stop here */ | ||
729 | |||
730 | /* We don't want to mount if someone's just doing a stat - | 724 | /* We don't want to mount if someone's just doing a stat - |
731 | * unless they're stat'ing a directory and appended a '/' to | 725 | * unless they're stat'ing a directory and appended a '/' to |
732 | * the name. | 726 | * the name. |
@@ -739,7 +733,7 @@ static int follow_automount(struct path *path, unsigned flags, | |||
739 | * of the daemon to instantiate them before they can be used. | 733 | * of the daemon to instantiate them before they can be used. |
740 | */ | 734 | */ |
741 | if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | | 735 | if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | |
742 | LOOKUP_OPEN | LOOKUP_CREATE)) && | 736 | LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && |
743 | path->dentry->d_inode) | 737 | path->dentry->d_inode) |
744 | return -EISDIR; | 738 | return -EISDIR; |
745 | 739 | ||
@@ -2616,6 +2610,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2616 | if (!dir->i_op->rmdir) | 2610 | if (!dir->i_op->rmdir) |
2617 | return -EPERM; | 2611 | return -EPERM; |
2618 | 2612 | ||
2613 | dget(dentry); | ||
2619 | mutex_lock(&dentry->d_inode->i_mutex); | 2614 | mutex_lock(&dentry->d_inode->i_mutex); |
2620 | 2615 | ||
2621 | error = -EBUSY; | 2616 | error = -EBUSY; |
@@ -2636,6 +2631,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2636 | 2631 | ||
2637 | out: | 2632 | out: |
2638 | mutex_unlock(&dentry->d_inode->i_mutex); | 2633 | mutex_unlock(&dentry->d_inode->i_mutex); |
2634 | dput(dentry); | ||
2639 | if (!error) | 2635 | if (!error) |
2640 | d_delete(dentry); | 2636 | d_delete(dentry); |
2641 | return error; | 2637 | return error; |
@@ -3025,6 +3021,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
3025 | if (error) | 3021 | if (error) |
3026 | return error; | 3022 | return error; |
3027 | 3023 | ||
3024 | dget(new_dentry); | ||
3028 | if (target) | 3025 | if (target) |
3029 | mutex_lock(&target->i_mutex); | 3026 | mutex_lock(&target->i_mutex); |
3030 | 3027 | ||
@@ -3045,6 +3042,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
3045 | out: | 3042 | out: |
3046 | if (target) | 3043 | if (target) |
3047 | mutex_unlock(&target->i_mutex); | 3044 | mutex_unlock(&target->i_mutex); |
3045 | dput(new_dentry); | ||
3048 | if (!error) | 3046 | if (!error) |
3049 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 3047 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
3050 | d_move(old_dentry,new_dentry); | 3048 | d_move(old_dentry,new_dentry); |
diff --git a/fs/namespace.c b/fs/namespace.c index 22bfe8273c68..b4febb29d3bb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1757,7 +1757,7 @@ static int do_loopback(struct path *path, char *old_name, | |||
1757 | return err; | 1757 | return err; |
1758 | if (!old_name || !*old_name) | 1758 | if (!old_name || !*old_name) |
1759 | return -EINVAL; | 1759 | return -EINVAL; |
1760 | err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); | 1760 | err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); |
1761 | if (err) | 1761 | if (err) |
1762 | return err; | 1762 | return err; |
1763 | 1763 | ||
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1ec1a85fa71c..3e93e9a1bee1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -56,6 +56,9 @@ enum nfs4_session_state { | |||
56 | NFS4_SESSION_DRAINING, | 56 | NFS4_SESSION_DRAINING, |
57 | }; | 57 | }; |
58 | 58 | ||
59 | #define NFS4_RENEW_TIMEOUT 0x01 | ||
60 | #define NFS4_RENEW_DELEGATION_CB 0x02 | ||
61 | |||
59 | struct nfs4_minor_version_ops { | 62 | struct nfs4_minor_version_ops { |
60 | u32 minor_version; | 63 | u32 minor_version; |
61 | 64 | ||
@@ -225,7 +228,7 @@ struct nfs4_state_recovery_ops { | |||
225 | }; | 228 | }; |
226 | 229 | ||
227 | struct nfs4_state_maintenance_ops { | 230 | struct nfs4_state_maintenance_ops { |
228 | int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); | 231 | int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); |
229 | struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); | 232 | struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); |
230 | int (*renew_lease)(struct nfs_client *, struct rpc_cred *); | 233 | int (*renew_lease)(struct nfs_client *, struct rpc_cred *); |
231 | }; | 234 | }; |
@@ -237,8 +240,6 @@ extern const struct inode_operations nfs4_dir_inode_operations; | |||
237 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); | 240 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); |
238 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); | 241 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); |
239 | extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); | 242 | extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); |
240 | extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); | ||
241 | extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); | ||
242 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); | 243 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); |
243 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); | 244 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); |
244 | extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); | 245 | extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); |
@@ -349,6 +350,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); | |||
349 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); | 350 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); |
350 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); | 351 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); |
351 | extern void nfs4_schedule_state_manager(struct nfs_client *); | 352 | extern void nfs4_schedule_state_manager(struct nfs_client *); |
353 | extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); | ||
352 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); | 354 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); |
353 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); | 355 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); |
354 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); | 356 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8c77039e7a81..4700fae1ada0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -3374,9 +3374,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) | |||
3374 | 3374 | ||
3375 | if (task->tk_status < 0) { | 3375 | if (task->tk_status < 0) { |
3376 | /* Unless we're shutting down, schedule state recovery! */ | 3376 | /* Unless we're shutting down, schedule state recovery! */ |
3377 | if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) | 3377 | if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) |
3378 | return; | ||
3379 | if (task->tk_status != NFS4ERR_CB_PATH_DOWN) { | ||
3378 | nfs4_schedule_lease_recovery(clp); | 3380 | nfs4_schedule_lease_recovery(clp); |
3379 | return; | 3381 | return; |
3382 | } | ||
3383 | nfs4_schedule_path_down_recovery(clp); | ||
3380 | } | 3384 | } |
3381 | do_renew_lease(clp, timestamp); | 3385 | do_renew_lease(clp, timestamp); |
3382 | } | 3386 | } |
@@ -3386,7 +3390,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { | |||
3386 | .rpc_release = nfs4_renew_release, | 3390 | .rpc_release = nfs4_renew_release, |
3387 | }; | 3391 | }; |
3388 | 3392 | ||
3389 | int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | 3393 | static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) |
3390 | { | 3394 | { |
3391 | struct rpc_message msg = { | 3395 | struct rpc_message msg = { |
3392 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], | 3396 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], |
@@ -3395,9 +3399,11 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | |||
3395 | }; | 3399 | }; |
3396 | struct nfs4_renewdata *data; | 3400 | struct nfs4_renewdata *data; |
3397 | 3401 | ||
3402 | if (renew_flags == 0) | ||
3403 | return 0; | ||
3398 | if (!atomic_inc_not_zero(&clp->cl_count)) | 3404 | if (!atomic_inc_not_zero(&clp->cl_count)) |
3399 | return -EIO; | 3405 | return -EIO; |
3400 | data = kmalloc(sizeof(*data), GFP_KERNEL); | 3406 | data = kmalloc(sizeof(*data), GFP_NOFS); |
3401 | if (data == NULL) | 3407 | if (data == NULL) |
3402 | return -ENOMEM; | 3408 | return -ENOMEM; |
3403 | data->client = clp; | 3409 | data->client = clp; |
@@ -3406,7 +3412,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | |||
3406 | &nfs4_renew_ops, data); | 3412 | &nfs4_renew_ops, data); |
3407 | } | 3413 | } |
3408 | 3414 | ||
3409 | int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) | 3415 | static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) |
3410 | { | 3416 | { |
3411 | struct rpc_message msg = { | 3417 | struct rpc_message msg = { |
3412 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], | 3418 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], |
@@ -5504,11 +5510,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ | |||
5504 | return rpc_run_task(&task_setup_data); | 5510 | return rpc_run_task(&task_setup_data); |
5505 | } | 5511 | } |
5506 | 5512 | ||
5507 | static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) | 5513 | static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) |
5508 | { | 5514 | { |
5509 | struct rpc_task *task; | 5515 | struct rpc_task *task; |
5510 | int ret = 0; | 5516 | int ret = 0; |
5511 | 5517 | ||
5518 | if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) | ||
5519 | return 0; | ||
5512 | task = _nfs41_proc_sequence(clp, cred); | 5520 | task = _nfs41_proc_sequence(clp, cred); |
5513 | if (IS_ERR(task)) | 5521 | if (IS_ERR(task)) |
5514 | ret = PTR_ERR(task); | 5522 | ret = PTR_ERR(task); |
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index df8e7f3ca56d..dc484c0eae7f 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work) | |||
60 | struct rpc_cred *cred; | 60 | struct rpc_cred *cred; |
61 | long lease; | 61 | long lease; |
62 | unsigned long last, now; | 62 | unsigned long last, now; |
63 | unsigned renew_flags = 0; | ||
63 | 64 | ||
64 | ops = clp->cl_mvops->state_renewal_ops; | 65 | ops = clp->cl_mvops->state_renewal_ops; |
65 | dprintk("%s: start\n", __func__); | 66 | dprintk("%s: start\n", __func__); |
@@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work) | |||
72 | last = clp->cl_last_renewal; | 73 | last = clp->cl_last_renewal; |
73 | now = jiffies; | 74 | now = jiffies; |
74 | /* Are we close to a lease timeout? */ | 75 | /* Are we close to a lease timeout? */ |
75 | if (time_after(now, last + lease/3)) { | 76 | if (time_after(now, last + lease/3)) |
77 | renew_flags |= NFS4_RENEW_TIMEOUT; | ||
78 | if (nfs_delegations_present(clp)) | ||
79 | renew_flags |= NFS4_RENEW_DELEGATION_CB; | ||
80 | |||
81 | if (renew_flags != 0) { | ||
76 | cred = ops->get_state_renewal_cred_locked(clp); | 82 | cred = ops->get_state_renewal_cred_locked(clp); |
77 | spin_unlock(&clp->cl_lock); | 83 | spin_unlock(&clp->cl_lock); |
78 | if (cred == NULL) { | 84 | if (cred == NULL) { |
79 | if (!nfs_delegations_present(clp)) { | 85 | if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) { |
80 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); | 86 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); |
81 | goto out; | 87 | goto out; |
82 | } | 88 | } |
83 | nfs_expire_all_delegations(clp); | 89 | nfs_expire_all_delegations(clp); |
84 | } else { | 90 | } else { |
85 | /* Queue an asynchronous RENEW. */ | 91 | /* Queue an asynchronous RENEW. */ |
86 | ops->sched_state_renewal(clp, cred); | 92 | ops->sched_state_renewal(clp, cred, renew_flags); |
87 | put_rpccred(cred); | 93 | put_rpccred(cred); |
88 | goto out_exp; | 94 | goto out_exp; |
89 | } | 95 | } |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 72ab97ef3d61..39914be40b03 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) | |||
1038 | nfs4_schedule_state_manager(clp); | 1038 | nfs4_schedule_state_manager(clp); |
1039 | } | 1039 | } |
1040 | 1040 | ||
1041 | void nfs4_schedule_path_down_recovery(struct nfs_client *clp) | ||
1042 | { | ||
1043 | nfs_handle_cb_pathdown(clp); | ||
1044 | nfs4_schedule_state_manager(clp); | ||
1045 | } | ||
1046 | |||
1041 | static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) | 1047 | static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) |
1042 | { | 1048 | { |
1043 | 1049 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b961ceac66b4..5b19b6aabe18 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2035,9 +2035,6 @@ static inline void nfs_initialise_sb(struct super_block *sb) | |||
2035 | sb->s_blocksize = nfs_block_bits(server->wsize, | 2035 | sb->s_blocksize = nfs_block_bits(server->wsize, |
2036 | &sb->s_blocksize_bits); | 2036 | &sb->s_blocksize_bits); |
2037 | 2037 | ||
2038 | if (server->flags & NFS_MOUNT_NOAC) | ||
2039 | sb->s_flags |= MS_SYNCHRONOUS; | ||
2040 | |||
2041 | sb->s_bdi = &server->backing_dev_info; | 2038 | sb->s_bdi = &server->backing_dev_info; |
2042 | 2039 | ||
2043 | nfs_super_set_maxbytes(sb, server->maxfilesize); | 2040 | nfs_super_set_maxbytes(sb, server->maxfilesize); |
@@ -2249,6 +2246,10 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2249 | if (server->flags & NFS_MOUNT_UNSHARED) | 2246 | if (server->flags & NFS_MOUNT_UNSHARED) |
2250 | compare_super = NULL; | 2247 | compare_super = NULL; |
2251 | 2248 | ||
2249 | /* -o noac implies -o sync */ | ||
2250 | if (server->flags & NFS_MOUNT_NOAC) | ||
2251 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2252 | |||
2252 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2253 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2253 | s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2254 | s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2254 | if (IS_ERR(s)) { | 2255 | if (IS_ERR(s)) { |
@@ -2361,6 +2362,10 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, | |||
2361 | if (server->flags & NFS_MOUNT_UNSHARED) | 2362 | if (server->flags & NFS_MOUNT_UNSHARED) |
2362 | compare_super = NULL; | 2363 | compare_super = NULL; |
2363 | 2364 | ||
2365 | /* -o noac implies -o sync */ | ||
2366 | if (server->flags & NFS_MOUNT_NOAC) | ||
2367 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2368 | |||
2364 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2369 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2365 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2370 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2366 | if (IS_ERR(s)) { | 2371 | if (IS_ERR(s)) { |
@@ -2628,6 +2633,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2628 | if (server->flags & NFS4_MOUNT_UNSHARED) | 2633 | if (server->flags & NFS4_MOUNT_UNSHARED) |
2629 | compare_super = NULL; | 2634 | compare_super = NULL; |
2630 | 2635 | ||
2636 | /* -o noac implies -o sync */ | ||
2637 | if (server->flags & NFS_MOUNT_NOAC) | ||
2638 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2639 | |||
2631 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2640 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2632 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2641 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2633 | if (IS_ERR(s)) { | 2642 | if (IS_ERR(s)) { |
@@ -2789,7 +2798,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, | |||
2789 | goto out_put_mnt_ns; | 2798 | goto out_put_mnt_ns; |
2790 | 2799 | ||
2791 | ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, | 2800 | ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, |
2792 | export_path, LOOKUP_FOLLOW, &path); | 2801 | export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); |
2793 | 2802 | ||
2794 | nfs_referral_loop_unprotect(); | 2803 | nfs_referral_loop_unprotect(); |
2795 | put_mnt_ns(ns_private); | 2804 | put_mnt_ns(ns_private); |
@@ -2916,6 +2925,10 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, | |||
2916 | if (server->flags & NFS4_MOUNT_UNSHARED) | 2925 | if (server->flags & NFS4_MOUNT_UNSHARED) |
2917 | compare_super = NULL; | 2926 | compare_super = NULL; |
2918 | 2927 | ||
2928 | /* -o noac implies -o sync */ | ||
2929 | if (server->flags & NFS_MOUNT_NOAC) | ||
2930 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2931 | |||
2919 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2932 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2920 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2933 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2921 | if (IS_ERR(s)) { | 2934 | if (IS_ERR(s)) { |
@@ -3003,6 +3016,10 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, | |||
3003 | if (server->flags & NFS4_MOUNT_UNSHARED) | 3016 | if (server->flags & NFS4_MOUNT_UNSHARED) |
3004 | compare_super = NULL; | 3017 | compare_super = NULL; |
3005 | 3018 | ||
3019 | /* -o noac implies -o sync */ | ||
3020 | if (server->flags & NFS_MOUNT_NOAC) | ||
3021 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
3022 | |||
3006 | /* Get a superblock - note that we may end up sharing one that already exists */ | 3023 | /* Get a superblock - note that we may end up sharing one that already exists */ |
3007 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 3024 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
3008 | if (IS_ERR(s)) { | 3025 | if (IS_ERR(s)) { |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b39b37f80913..c9bd2a6b7d4b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -958,7 +958,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head | |||
958 | if (!data) | 958 | if (!data) |
959 | goto out_bad; | 959 | goto out_bad; |
960 | data->pagevec[0] = page; | 960 | data->pagevec[0] = page; |
961 | nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); | 961 | nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); |
962 | list_add(&data->list, res); | 962 | list_add(&data->list, res); |
963 | requests++; | 963 | requests++; |
964 | nbytes -= len; | 964 | nbytes -= len; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 25b6a887adb9..5afaa58a8630 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -877,30 +877,54 @@ struct numa_maps_private { | |||
877 | struct numa_maps md; | 877 | struct numa_maps md; |
878 | }; | 878 | }; |
879 | 879 | ||
880 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) | 880 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, |
881 | unsigned long nr_pages) | ||
881 | { | 882 | { |
882 | int count = page_mapcount(page); | 883 | int count = page_mapcount(page); |
883 | 884 | ||
884 | md->pages++; | 885 | md->pages += nr_pages; |
885 | if (pte_dirty || PageDirty(page)) | 886 | if (pte_dirty || PageDirty(page)) |
886 | md->dirty++; | 887 | md->dirty += nr_pages; |
887 | 888 | ||
888 | if (PageSwapCache(page)) | 889 | if (PageSwapCache(page)) |
889 | md->swapcache++; | 890 | md->swapcache += nr_pages; |
890 | 891 | ||
891 | if (PageActive(page) || PageUnevictable(page)) | 892 | if (PageActive(page) || PageUnevictable(page)) |
892 | md->active++; | 893 | md->active += nr_pages; |
893 | 894 | ||
894 | if (PageWriteback(page)) | 895 | if (PageWriteback(page)) |
895 | md->writeback++; | 896 | md->writeback += nr_pages; |
896 | 897 | ||
897 | if (PageAnon(page)) | 898 | if (PageAnon(page)) |
898 | md->anon++; | 899 | md->anon += nr_pages; |
899 | 900 | ||
900 | if (count > md->mapcount_max) | 901 | if (count > md->mapcount_max) |
901 | md->mapcount_max = count; | 902 | md->mapcount_max = count; |
902 | 903 | ||
903 | md->node[page_to_nid(page)]++; | 904 | md->node[page_to_nid(page)] += nr_pages; |
905 | } | ||
906 | |||
907 | static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, | ||
908 | unsigned long addr) | ||
909 | { | ||
910 | struct page *page; | ||
911 | int nid; | ||
912 | |||
913 | if (!pte_present(pte)) | ||
914 | return NULL; | ||
915 | |||
916 | page = vm_normal_page(vma, addr, pte); | ||
917 | if (!page) | ||
918 | return NULL; | ||
919 | |||
920 | if (PageReserved(page)) | ||
921 | return NULL; | ||
922 | |||
923 | nid = page_to_nid(page); | ||
924 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | ||
925 | return NULL; | ||
926 | |||
927 | return page; | ||
904 | } | 928 | } |
905 | 929 | ||
906 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | 930 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, |
@@ -912,26 +936,32 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
912 | pte_t *pte; | 936 | pte_t *pte; |
913 | 937 | ||
914 | md = walk->private; | 938 | md = walk->private; |
915 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | 939 | spin_lock(&walk->mm->page_table_lock); |
916 | do { | 940 | if (pmd_trans_huge(*pmd)) { |
917 | struct page *page; | 941 | if (pmd_trans_splitting(*pmd)) { |
918 | int nid; | 942 | spin_unlock(&walk->mm->page_table_lock); |
943 | wait_split_huge_page(md->vma->anon_vma, pmd); | ||
944 | } else { | ||
945 | pte_t huge_pte = *(pte_t *)pmd; | ||
946 | struct page *page; | ||
919 | 947 | ||
920 | if (!pte_present(*pte)) | 948 | page = can_gather_numa_stats(huge_pte, md->vma, addr); |
921 | continue; | 949 | if (page) |
950 | gather_stats(page, md, pte_dirty(huge_pte), | ||
951 | HPAGE_PMD_SIZE/PAGE_SIZE); | ||
952 | spin_unlock(&walk->mm->page_table_lock); | ||
953 | return 0; | ||
954 | } | ||
955 | } else { | ||
956 | spin_unlock(&walk->mm->page_table_lock); | ||
957 | } | ||
922 | 958 | ||
923 | page = vm_normal_page(md->vma, addr, *pte); | 959 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
960 | do { | ||
961 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); | ||
924 | if (!page) | 962 | if (!page) |
925 | continue; | 963 | continue; |
926 | 964 | gather_stats(page, md, pte_dirty(*pte), 1); | |
927 | if (PageReserved(page)) | ||
928 | continue; | ||
929 | |||
930 | nid = page_to_nid(page); | ||
931 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | ||
932 | continue; | ||
933 | |||
934 | gather_stats(page, md, pte_dirty(*pte)); | ||
935 | 965 | ||
936 | } while (pte++, addr += PAGE_SIZE, addr != end); | 966 | } while (pte++, addr += PAGE_SIZE, addr != end); |
937 | pte_unmap_unlock(orig_pte, ptl); | 967 | pte_unmap_unlock(orig_pte, ptl); |
@@ -952,7 +982,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | |||
952 | return 0; | 982 | return 0; |
953 | 983 | ||
954 | md = walk->private; | 984 | md = walk->private; |
955 | gather_stats(page, md, pte_dirty(*pte)); | 985 | gather_stats(page, md, pte_dirty(*pte), 1); |
956 | return 0; | 986 | return 0; |
957 | } | 987 | } |
958 | 988 | ||
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b34bdb25490c..10b6be3ca280 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -355,7 +355,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, | |||
355 | * resolution (think about autofs) and thus deadlocks could arise. | 355 | * resolution (think about autofs) and thus deadlocks could arise. |
356 | */ | 356 | */ |
357 | if (cmds == Q_QUOTAON) { | 357 | if (cmds == Q_QUOTAON) { |
358 | ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path); | 358 | ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); |
359 | if (ret) | 359 | if (ret) |
360 | pathp = ERR_PTR(ret); | 360 | pathp = ERR_PTR(ret); |
361 | else | 361 | else |
@@ -81,8 +81,6 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, | |||
81 | 81 | ||
82 | if (!(flag & AT_SYMLINK_NOFOLLOW)) | 82 | if (!(flag & AT_SYMLINK_NOFOLLOW)) |
83 | lookup_flags |= LOOKUP_FOLLOW; | 83 | lookup_flags |= LOOKUP_FOLLOW; |
84 | if (flag & AT_NO_AUTOMOUNT) | ||
85 | lookup_flags |= LOOKUP_NO_AUTOMOUNT; | ||
86 | if (flag & AT_EMPTY_PATH) | 84 | if (flag & AT_EMPTY_PATH) |
87 | lookup_flags |= LOOKUP_EMPTY; | 85 | lookup_flags |= LOOKUP_EMPTY; |
88 | 86 | ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 63e971e2b837..8c37dde4c521 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1300,6 +1300,7 @@ xfs_end_io_direct_write( | |||
1300 | bool is_async) | 1300 | bool is_async) |
1301 | { | 1301 | { |
1302 | struct xfs_ioend *ioend = iocb->private; | 1302 | struct xfs_ioend *ioend = iocb->private; |
1303 | struct inode *inode = ioend->io_inode; | ||
1303 | 1304 | ||
1304 | /* | 1305 | /* |
1305 | * blockdev_direct_IO can return an error even after the I/O | 1306 | * blockdev_direct_IO can return an error even after the I/O |
@@ -1331,7 +1332,7 @@ xfs_end_io_direct_write( | |||
1331 | } | 1332 | } |
1332 | 1333 | ||
1333 | /* XXX: probably should move into the real I/O completion handler */ | 1334 | /* XXX: probably should move into the real I/O completion handler */ |
1334 | inode_dio_done(ioend->io_inode); | 1335 | inode_dio_done(inode); |
1335 | } | 1336 | } |
1336 | 1337 | ||
1337 | STATIC ssize_t | 1338 | STATIC ssize_t |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index cac2ecfa6746..ef43fce519a1 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -629,7 +629,7 @@ xfs_buf_item_push( | |||
629 | * the xfsbufd to get this buffer written. We have to unlock the buffer | 629 | * the xfsbufd to get this buffer written. We have to unlock the buffer |
630 | * to allow the xfsbufd to write it, too. | 630 | * to allow the xfsbufd to write it, too. |
631 | */ | 631 | */ |
632 | STATIC void | 632 | STATIC bool |
633 | xfs_buf_item_pushbuf( | 633 | xfs_buf_item_pushbuf( |
634 | struct xfs_log_item *lip) | 634 | struct xfs_log_item *lip) |
635 | { | 635 | { |
@@ -643,6 +643,7 @@ xfs_buf_item_pushbuf( | |||
643 | 643 | ||
644 | xfs_buf_delwri_promote(bp); | 644 | xfs_buf_delwri_promote(bp); |
645 | xfs_buf_relse(bp); | 645 | xfs_buf_relse(bp); |
646 | return true; | ||
646 | } | 647 | } |
647 | 648 | ||
648 | STATIC void | 649 | STATIC void |
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 9e0e2fa3f2c8..bb3f71d236d2 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c | |||
@@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait( | |||
183 | * search the buffer cache can be a time consuming thing, and AIL lock is a | 183 | * search the buffer cache can be a time consuming thing, and AIL lock is a |
184 | * spinlock. | 184 | * spinlock. |
185 | */ | 185 | */ |
186 | STATIC void | 186 | STATIC bool |
187 | xfs_qm_dquot_logitem_pushbuf( | 187 | xfs_qm_dquot_logitem_pushbuf( |
188 | struct xfs_log_item *lip) | 188 | struct xfs_log_item *lip) |
189 | { | 189 | { |
190 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); | 190 | struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); |
191 | struct xfs_dquot *dqp = qlip->qli_dquot; | 191 | struct xfs_dquot *dqp = qlip->qli_dquot; |
192 | struct xfs_buf *bp; | 192 | struct xfs_buf *bp; |
193 | bool ret = true; | ||
193 | 194 | ||
194 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 195 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
195 | 196 | ||
@@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf( | |||
201 | if (completion_done(&dqp->q_flush) || | 202 | if (completion_done(&dqp->q_flush) || |
202 | !(lip->li_flags & XFS_LI_IN_AIL)) { | 203 | !(lip->li_flags & XFS_LI_IN_AIL)) { |
203 | xfs_dqunlock(dqp); | 204 | xfs_dqunlock(dqp); |
204 | return; | 205 | return true; |
205 | } | 206 | } |
206 | 207 | ||
207 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, | 208 | bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, |
208 | dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); | 209 | dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); |
209 | xfs_dqunlock(dqp); | 210 | xfs_dqunlock(dqp); |
210 | if (!bp) | 211 | if (!bp) |
211 | return; | 212 | return true; |
212 | if (XFS_BUF_ISDELAYWRITE(bp)) | 213 | if (XFS_BUF_ISDELAYWRITE(bp)) |
213 | xfs_buf_delwri_promote(bp); | 214 | xfs_buf_delwri_promote(bp); |
215 | if (xfs_buf_ispinned(bp)) | ||
216 | ret = false; | ||
214 | xfs_buf_relse(bp); | 217 | xfs_buf_relse(bp); |
218 | return ret; | ||
215 | } | 219 | } |
216 | 220 | ||
217 | /* | 221 | /* |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 588406dc6a35..836ad80d4f2b 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -708,13 +708,14 @@ xfs_inode_item_committed( | |||
708 | * marked delayed write. If that's the case, we'll promote it and that will | 708 | * marked delayed write. If that's the case, we'll promote it and that will |
709 | * allow the caller to write the buffer by triggering the xfsbufd to run. | 709 | * allow the caller to write the buffer by triggering the xfsbufd to run. |
710 | */ | 710 | */ |
711 | STATIC void | 711 | STATIC bool |
712 | xfs_inode_item_pushbuf( | 712 | xfs_inode_item_pushbuf( |
713 | struct xfs_log_item *lip) | 713 | struct xfs_log_item *lip) |
714 | { | 714 | { |
715 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 715 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
716 | struct xfs_inode *ip = iip->ili_inode; | 716 | struct xfs_inode *ip = iip->ili_inode; |
717 | struct xfs_buf *bp; | 717 | struct xfs_buf *bp; |
718 | bool ret = true; | ||
718 | 719 | ||
719 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 720 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
720 | 721 | ||
@@ -725,7 +726,7 @@ xfs_inode_item_pushbuf( | |||
725 | if (completion_done(&ip->i_flush) || | 726 | if (completion_done(&ip->i_flush) || |
726 | !(lip->li_flags & XFS_LI_IN_AIL)) { | 727 | !(lip->li_flags & XFS_LI_IN_AIL)) { |
727 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 728 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
728 | return; | 729 | return true; |
729 | } | 730 | } |
730 | 731 | ||
731 | bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, | 732 | bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, |
@@ -733,10 +734,13 @@ xfs_inode_item_pushbuf( | |||
733 | 734 | ||
734 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 735 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
735 | if (!bp) | 736 | if (!bp) |
736 | return; | 737 | return true; |
737 | if (XFS_BUF_ISDELAYWRITE(bp)) | 738 | if (XFS_BUF_ISDELAYWRITE(bp)) |
738 | xfs_buf_delwri_promote(bp); | 739 | xfs_buf_delwri_promote(bp); |
740 | if (xfs_buf_ispinned(bp)) | ||
741 | ret = false; | ||
739 | xfs_buf_relse(bp); | 742 | xfs_buf_relse(bp); |
743 | return ret; | ||
740 | } | 744 | } |
741 | 745 | ||
742 | /* | 746 | /* |
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 1e8a45e74c3e..828662f70d64 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -68,6 +68,8 @@ | |||
68 | #include <linux/ctype.h> | 68 | #include <linux/ctype.h> |
69 | #include <linux/writeback.h> | 69 | #include <linux/writeback.h> |
70 | #include <linux/capability.h> | 70 | #include <linux/capability.h> |
71 | #include <linux/kthread.h> | ||
72 | #include <linux/freezer.h> | ||
71 | #include <linux/list_sort.h> | 73 | #include <linux/list_sort.h> |
72 | 74 | ||
73 | #include <asm/page.h> | 75 | #include <asm/page.h> |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2366c54cc4fa..5cf06b85fd9d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1652,24 +1652,13 @@ xfs_init_workqueues(void) | |||
1652 | */ | 1652 | */ |
1653 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); | 1653 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); |
1654 | if (!xfs_syncd_wq) | 1654 | if (!xfs_syncd_wq) |
1655 | goto out; | 1655 | return -ENOMEM; |
1656 | |||
1657 | xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); | ||
1658 | if (!xfs_ail_wq) | ||
1659 | goto out_destroy_syncd; | ||
1660 | |||
1661 | return 0; | 1656 | return 0; |
1662 | |||
1663 | out_destroy_syncd: | ||
1664 | destroy_workqueue(xfs_syncd_wq); | ||
1665 | out: | ||
1666 | return -ENOMEM; | ||
1667 | } | 1657 | } |
1668 | 1658 | ||
1669 | STATIC void | 1659 | STATIC void |
1670 | xfs_destroy_workqueues(void) | 1660 | xfs_destroy_workqueues(void) |
1671 | { | 1661 | { |
1672 | destroy_workqueue(xfs_ail_wq); | ||
1673 | destroy_workqueue(xfs_syncd_wq); | 1662 | destroy_workqueue(xfs_syncd_wq); |
1674 | } | 1663 | } |
1675 | 1664 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 06a9759b6352..53597f4db9b5 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -350,7 +350,7 @@ typedef struct xfs_item_ops { | |||
350 | void (*iop_unlock)(xfs_log_item_t *); | 350 | void (*iop_unlock)(xfs_log_item_t *); |
351 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); | 351 | xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); |
352 | void (*iop_push)(xfs_log_item_t *); | 352 | void (*iop_push)(xfs_log_item_t *); |
353 | void (*iop_pushbuf)(xfs_log_item_t *); | 353 | bool (*iop_pushbuf)(xfs_log_item_t *); |
354 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); | 354 | void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); |
355 | } xfs_item_ops_t; | 355 | } xfs_item_ops_t; |
356 | 356 | ||
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index c15aa29fa169..3a1e7ca54c2d 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
@@ -28,8 +28,6 @@ | |||
28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
30 | 30 | ||
31 | struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ | ||
32 | |||
33 | #ifdef DEBUG | 31 | #ifdef DEBUG |
34 | /* | 32 | /* |
35 | * Check that the list is sorted as it should be. | 33 | * Check that the list is sorted as it should be. |
@@ -356,16 +354,10 @@ xfs_ail_delete( | |||
356 | xfs_trans_ail_cursor_clear(ailp, lip); | 354 | xfs_trans_ail_cursor_clear(ailp, lip); |
357 | } | 355 | } |
358 | 356 | ||
359 | /* | 357 | static long |
360 | * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself | 358 | xfsaild_push( |
361 | * to run at a later time if there is more work to do to complete the push. | 359 | struct xfs_ail *ailp) |
362 | */ | ||
363 | STATIC void | ||
364 | xfs_ail_worker( | ||
365 | struct work_struct *work) | ||
366 | { | 360 | { |
367 | struct xfs_ail *ailp = container_of(to_delayed_work(work), | ||
368 | struct xfs_ail, xa_work); | ||
369 | xfs_mount_t *mp = ailp->xa_mount; | 361 | xfs_mount_t *mp = ailp->xa_mount; |
370 | struct xfs_ail_cursor cur; | 362 | struct xfs_ail_cursor cur; |
371 | xfs_log_item_t *lip; | 363 | xfs_log_item_t *lip; |
@@ -427,8 +419,13 @@ xfs_ail_worker( | |||
427 | 419 | ||
428 | case XFS_ITEM_PUSHBUF: | 420 | case XFS_ITEM_PUSHBUF: |
429 | XFS_STATS_INC(xs_push_ail_pushbuf); | 421 | XFS_STATS_INC(xs_push_ail_pushbuf); |
430 | IOP_PUSHBUF(lip); | 422 | |
431 | ailp->xa_last_pushed_lsn = lsn; | 423 | if (!IOP_PUSHBUF(lip)) { |
424 | stuck++; | ||
425 | flush_log = 1; | ||
426 | } else { | ||
427 | ailp->xa_last_pushed_lsn = lsn; | ||
428 | } | ||
432 | push_xfsbufd = 1; | 429 | push_xfsbufd = 1; |
433 | break; | 430 | break; |
434 | 431 | ||
@@ -440,7 +437,6 @@ xfs_ail_worker( | |||
440 | 437 | ||
441 | case XFS_ITEM_LOCKED: | 438 | case XFS_ITEM_LOCKED: |
442 | XFS_STATS_INC(xs_push_ail_locked); | 439 | XFS_STATS_INC(xs_push_ail_locked); |
443 | ailp->xa_last_pushed_lsn = lsn; | ||
444 | stuck++; | 440 | stuck++; |
445 | break; | 441 | break; |
446 | 442 | ||
@@ -501,20 +497,6 @@ out_done: | |||
501 | /* We're past our target or empty, so idle */ | 497 | /* We're past our target or empty, so idle */ |
502 | ailp->xa_last_pushed_lsn = 0; | 498 | ailp->xa_last_pushed_lsn = 0; |
503 | 499 | ||
504 | /* | ||
505 | * We clear the XFS_AIL_PUSHING_BIT first before checking | ||
506 | * whether the target has changed. If the target has changed, | ||
507 | * this pushes the requeue race directly onto the result of the | ||
508 | * atomic test/set bit, so we are guaranteed that either the | ||
509 | * the pusher that changed the target or ourselves will requeue | ||
510 | * the work (but not both). | ||
511 | */ | ||
512 | clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); | ||
513 | smp_rmb(); | ||
514 | if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || | ||
515 | test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | ||
516 | return; | ||
517 | |||
518 | tout = 50; | 500 | tout = 50; |
519 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { | 501 | } else if (XFS_LSN_CMP(lsn, target) >= 0) { |
520 | /* | 502 | /* |
@@ -537,9 +519,30 @@ out_done: | |||
537 | tout = 20; | 519 | tout = 20; |
538 | } | 520 | } |
539 | 521 | ||
540 | /* There is more to do, requeue us. */ | 522 | return tout; |
541 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, | 523 | } |
542 | msecs_to_jiffies(tout)); | 524 | |
525 | static int | ||
526 | xfsaild( | ||
527 | void *data) | ||
528 | { | ||
529 | struct xfs_ail *ailp = data; | ||
530 | long tout = 0; /* milliseconds */ | ||
531 | |||
532 | while (!kthread_should_stop()) { | ||
533 | if (tout && tout <= 20) | ||
534 | __set_current_state(TASK_KILLABLE); | ||
535 | else | ||
536 | __set_current_state(TASK_INTERRUPTIBLE); | ||
537 | schedule_timeout(tout ? | ||
538 | msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); | ||
539 | |||
540 | try_to_freeze(); | ||
541 | |||
542 | tout = xfsaild_push(ailp); | ||
543 | } | ||
544 | |||
545 | return 0; | ||
543 | } | 546 | } |
544 | 547 | ||
545 | /* | 548 | /* |
@@ -574,8 +577,9 @@ xfs_ail_push( | |||
574 | */ | 577 | */ |
575 | smp_wmb(); | 578 | smp_wmb(); |
576 | xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); | 579 | xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); |
577 | if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) | 580 | smp_wmb(); |
578 | queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); | 581 | |
582 | wake_up_process(ailp->xa_task); | ||
579 | } | 583 | } |
580 | 584 | ||
581 | /* | 585 | /* |
@@ -813,9 +817,18 @@ xfs_trans_ail_init( | |||
813 | INIT_LIST_HEAD(&ailp->xa_ail); | 817 | INIT_LIST_HEAD(&ailp->xa_ail); |
814 | INIT_LIST_HEAD(&ailp->xa_cursors); | 818 | INIT_LIST_HEAD(&ailp->xa_cursors); |
815 | spin_lock_init(&ailp->xa_lock); | 819 | spin_lock_init(&ailp->xa_lock); |
816 | INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); | 820 | |
821 | ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", | ||
822 | ailp->xa_mount->m_fsname); | ||
823 | if (IS_ERR(ailp->xa_task)) | ||
824 | goto out_free_ailp; | ||
825 | |||
817 | mp->m_ail = ailp; | 826 | mp->m_ail = ailp; |
818 | return 0; | 827 | return 0; |
828 | |||
829 | out_free_ailp: | ||
830 | kmem_free(ailp); | ||
831 | return ENOMEM; | ||
819 | } | 832 | } |
820 | 833 | ||
821 | void | 834 | void |
@@ -824,6 +837,6 @@ xfs_trans_ail_destroy( | |||
824 | { | 837 | { |
825 | struct xfs_ail *ailp = mp->m_ail; | 838 | struct xfs_ail *ailp = mp->m_ail; |
826 | 839 | ||
827 | cancel_delayed_work_sync(&ailp->xa_work); | 840 | kthread_stop(ailp->xa_task); |
828 | kmem_free(ailp); | 841 | kmem_free(ailp); |
829 | } | 842 | } |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 212946b97239..22750b5e4a8f 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -64,23 +64,17 @@ struct xfs_ail_cursor { | |||
64 | */ | 64 | */ |
65 | struct xfs_ail { | 65 | struct xfs_ail { |
66 | struct xfs_mount *xa_mount; | 66 | struct xfs_mount *xa_mount; |
67 | struct task_struct *xa_task; | ||
67 | struct list_head xa_ail; | 68 | struct list_head xa_ail; |
68 | xfs_lsn_t xa_target; | 69 | xfs_lsn_t xa_target; |
69 | struct list_head xa_cursors; | 70 | struct list_head xa_cursors; |
70 | spinlock_t xa_lock; | 71 | spinlock_t xa_lock; |
71 | struct delayed_work xa_work; | ||
72 | xfs_lsn_t xa_last_pushed_lsn; | 72 | xfs_lsn_t xa_last_pushed_lsn; |
73 | unsigned long xa_flags; | ||
74 | }; | 73 | }; |
75 | 74 | ||
76 | #define XFS_AIL_PUSHING_BIT 0 | ||
77 | |||
78 | /* | 75 | /* |
79 | * From xfs_trans_ail.c | 76 | * From xfs_trans_ail.c |
80 | */ | 77 | */ |
81 | |||
82 | extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ | ||
83 | |||
84 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, | 78 | void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, |
85 | struct xfs_ail_cursor *cur, | 79 | struct xfs_ail_cursor *cur, |
86 | struct xfs_log_item **log_items, int nr_items, | 80 | struct xfs_log_item **log_items, int nr_items, |