diff options
Diffstat (limited to 'fs')
46 files changed, 5740 insertions, 1710 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 40e6ac08c21f..c0ddfd29c5e5 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
| @@ -7,6 +7,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
| 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
| 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
| 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
| 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
| 11 | reada.o backref.o | ||
| 11 | 12 | ||
| 12 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index eb159aaa5a11..89b156d85d63 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -59,22 +59,19 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
| 59 | if (!value) | 59 | if (!value) |
| 60 | return ERR_PTR(-ENOMEM); | 60 | return ERR_PTR(-ENOMEM); |
| 61 | size = __btrfs_getxattr(inode, name, value, size); | 61 | size = __btrfs_getxattr(inode, name, value, size); |
| 62 | if (size > 0) { | 62 | } |
| 63 | acl = posix_acl_from_xattr(value, size); | 63 | if (size > 0) { |
| 64 | if (IS_ERR(acl)) { | 64 | acl = posix_acl_from_xattr(value, size); |
| 65 | kfree(value); | ||
| 66 | return acl; | ||
| 67 | } | ||
| 68 | set_cached_acl(inode, type, acl); | ||
| 69 | } | ||
| 70 | kfree(value); | ||
| 71 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { | 65 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { |
| 72 | /* FIXME, who returns -ENOENT? I think nobody */ | 66 | /* FIXME, who returns -ENOENT? I think nobody */ |
| 73 | acl = NULL; | 67 | acl = NULL; |
| 74 | set_cached_acl(inode, type, acl); | ||
| 75 | } else { | 68 | } else { |
| 76 | acl = ERR_PTR(-EIO); | 69 | acl = ERR_PTR(-EIO); |
| 77 | } | 70 | } |
| 71 | kfree(value); | ||
| 72 | |||
| 73 | if (!IS_ERR(acl)) | ||
| 74 | set_cached_acl(inode, type, acl); | ||
| 78 | 75 | ||
| 79 | return acl; | 76 | return acl; |
| 80 | } | 77 | } |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c new file mode 100644 index 000000000000..8855aad3929c --- /dev/null +++ b/fs/btrfs/backref.c | |||
| @@ -0,0 +1,776 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "ctree.h" | ||
| 20 | #include "disk-io.h" | ||
| 21 | #include "backref.h" | ||
| 22 | |||
| 23 | struct __data_ref { | ||
| 24 | struct list_head list; | ||
| 25 | u64 inum; | ||
| 26 | u64 root; | ||
| 27 | u64 extent_data_item_offset; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct __shared_ref { | ||
| 31 | struct list_head list; | ||
| 32 | u64 disk_byte; | ||
| 33 | }; | ||
| 34 | |||
| 35 | static int __inode_info(u64 inum, u64 ioff, u8 key_type, | ||
| 36 | struct btrfs_root *fs_root, struct btrfs_path *path, | ||
| 37 | struct btrfs_key *found_key) | ||
| 38 | { | ||
| 39 | int ret; | ||
| 40 | struct btrfs_key key; | ||
| 41 | struct extent_buffer *eb; | ||
| 42 | |||
| 43 | key.type = key_type; | ||
| 44 | key.objectid = inum; | ||
| 45 | key.offset = ioff; | ||
| 46 | |||
| 47 | ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); | ||
| 48 | if (ret < 0) | ||
| 49 | return ret; | ||
| 50 | |||
| 51 | eb = path->nodes[0]; | ||
| 52 | if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { | ||
| 53 | ret = btrfs_next_leaf(fs_root, path); | ||
| 54 | if (ret) | ||
| 55 | return ret; | ||
| 56 | eb = path->nodes[0]; | ||
| 57 | } | ||
| 58 | |||
| 59 | btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); | ||
| 60 | if (found_key->type != key.type || found_key->objectid != key.objectid) | ||
| 61 | return 1; | ||
| 62 | |||
| 63 | return 0; | ||
| 64 | } | ||
| 65 | |||
| 66 | /* | ||
| 67 | * this makes the path point to (inum INODE_ITEM ioff) | ||
| 68 | */ | ||
| 69 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | ||
| 70 | struct btrfs_path *path) | ||
| 71 | { | ||
| 72 | struct btrfs_key key; | ||
| 73 | return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path, | ||
| 74 | &key); | ||
| 75 | } | ||
| 76 | |||
| 77 | static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | ||
| 78 | struct btrfs_path *path, | ||
| 79 | struct btrfs_key *found_key) | ||
| 80 | { | ||
| 81 | return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path, | ||
| 82 | found_key); | ||
| 83 | } | ||
| 84 | |||
| 85 | /* | ||
| 86 | * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements | ||
| 87 | * of the path are separated by '/' and the path is guaranteed to be | ||
| 88 | * 0-terminated. the path is only given within the current file system. | ||
| 89 | * Therefore, it never starts with a '/'. the caller is responsible to provide | ||
| 90 | * "size" bytes in "dest". the dest buffer will be filled backwards. finally, | ||
| 91 | * the start point of the resulting string is returned. this pointer is within | ||
| 92 | * dest, normally. | ||
| 93 | * in case the path buffer would overflow, the pointer is decremented further | ||
| 94 | * as if output was written to the buffer, though no more output is actually | ||
| 95 | * generated. that way, the caller can determine how much space would be | ||
| 96 | * required for the path to fit into the buffer. in that case, the returned | ||
| 97 | * value will be smaller than dest. callers must check this! | ||
| 98 | */ | ||
| 99 | static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | ||
| 100 | struct btrfs_inode_ref *iref, | ||
| 101 | struct extent_buffer *eb_in, u64 parent, | ||
| 102 | char *dest, u32 size) | ||
| 103 | { | ||
| 104 | u32 len; | ||
| 105 | int slot; | ||
| 106 | u64 next_inum; | ||
| 107 | int ret; | ||
| 108 | s64 bytes_left = size - 1; | ||
| 109 | struct extent_buffer *eb = eb_in; | ||
| 110 | struct btrfs_key found_key; | ||
| 111 | |||
| 112 | if (bytes_left >= 0) | ||
| 113 | dest[bytes_left] = '\0'; | ||
| 114 | |||
| 115 | while (1) { | ||
| 116 | len = btrfs_inode_ref_name_len(eb, iref); | ||
| 117 | bytes_left -= len; | ||
| 118 | if (bytes_left >= 0) | ||
| 119 | read_extent_buffer(eb, dest + bytes_left, | ||
| 120 | (unsigned long)(iref + 1), len); | ||
| 121 | if (eb != eb_in) | ||
| 122 | free_extent_buffer(eb); | ||
| 123 | ret = inode_ref_info(parent, 0, fs_root, path, &found_key); | ||
| 124 | if (ret) | ||
| 125 | break; | ||
| 126 | next_inum = found_key.offset; | ||
| 127 | |||
| 128 | /* regular exit ahead */ | ||
| 129 | if (parent == next_inum) | ||
| 130 | break; | ||
| 131 | |||
| 132 | slot = path->slots[0]; | ||
| 133 | eb = path->nodes[0]; | ||
| 134 | /* make sure we can use eb after releasing the path */ | ||
| 135 | if (eb != eb_in) | ||
| 136 | atomic_inc(&eb->refs); | ||
| 137 | btrfs_release_path(path); | ||
| 138 | |||
| 139 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | ||
| 140 | parent = next_inum; | ||
| 141 | --bytes_left; | ||
| 142 | if (bytes_left >= 0) | ||
| 143 | dest[bytes_left] = '/'; | ||
| 144 | } | ||
| 145 | |||
| 146 | btrfs_release_path(path); | ||
| 147 | |||
| 148 | if (ret) | ||
| 149 | return ERR_PTR(ret); | ||
| 150 | |||
| 151 | return dest + bytes_left; | ||
| 152 | } | ||
| 153 | |||
| 154 | /* | ||
| 155 | * this makes the path point to (logical EXTENT_ITEM *) | ||
| 156 | * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for | ||
| 157 | * tree blocks and <0 on error. | ||
| 158 | */ | ||
| 159 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | ||
| 160 | struct btrfs_path *path, struct btrfs_key *found_key) | ||
| 161 | { | ||
| 162 | int ret; | ||
| 163 | u64 flags; | ||
| 164 | u32 item_size; | ||
| 165 | struct extent_buffer *eb; | ||
| 166 | struct btrfs_extent_item *ei; | ||
| 167 | struct btrfs_key key; | ||
| 168 | |||
| 169 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 170 | key.objectid = logical; | ||
| 171 | key.offset = (u64)-1; | ||
| 172 | |||
| 173 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | ||
| 174 | if (ret < 0) | ||
| 175 | return ret; | ||
| 176 | ret = btrfs_previous_item(fs_info->extent_root, path, | ||
| 177 | 0, BTRFS_EXTENT_ITEM_KEY); | ||
| 178 | if (ret < 0) | ||
| 179 | return ret; | ||
| 180 | |||
| 181 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | ||
| 182 | if (found_key->type != BTRFS_EXTENT_ITEM_KEY || | ||
| 183 | found_key->objectid > logical || | ||
| 184 | found_key->objectid + found_key->offset <= logical) | ||
| 185 | return -ENOENT; | ||
| 186 | |||
| 187 | eb = path->nodes[0]; | ||
| 188 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | ||
| 189 | BUG_ON(item_size < sizeof(*ei)); | ||
| 190 | |||
| 191 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | ||
| 192 | flags = btrfs_extent_flags(eb, ei); | ||
| 193 | |||
| 194 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
| 195 | return BTRFS_EXTENT_FLAG_TREE_BLOCK; | ||
| 196 | if (flags & BTRFS_EXTENT_FLAG_DATA) | ||
| 197 | return BTRFS_EXTENT_FLAG_DATA; | ||
| 198 | |||
| 199 | return -EIO; | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | ||
| 203 | * helper function to iterate extent inline refs. ptr must point to a 0 value | ||
| 204 | * for the first call and may be modified. it is used to track state. | ||
| 205 | * if more refs exist, 0 is returned and the next call to | ||
| 206 | * __get_extent_inline_ref must pass the modified ptr parameter to get the | ||
| 207 | * next ref. after the last ref was processed, 1 is returned. | ||
| 208 | * returns <0 on error | ||
| 209 | */ | ||
| 210 | static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, | ||
| 211 | struct btrfs_extent_item *ei, u32 item_size, | ||
| 212 | struct btrfs_extent_inline_ref **out_eiref, | ||
| 213 | int *out_type) | ||
| 214 | { | ||
| 215 | unsigned long end; | ||
| 216 | u64 flags; | ||
| 217 | struct btrfs_tree_block_info *info; | ||
| 218 | |||
| 219 | if (!*ptr) { | ||
| 220 | /* first call */ | ||
| 221 | flags = btrfs_extent_flags(eb, ei); | ||
| 222 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
| 223 | info = (struct btrfs_tree_block_info *)(ei + 1); | ||
| 224 | *out_eiref = | ||
| 225 | (struct btrfs_extent_inline_ref *)(info + 1); | ||
| 226 | } else { | ||
| 227 | *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); | ||
| 228 | } | ||
| 229 | *ptr = (unsigned long)*out_eiref; | ||
| 230 | if ((void *)*ptr >= (void *)ei + item_size) | ||
| 231 | return -ENOENT; | ||
| 232 | } | ||
| 233 | |||
| 234 | end = (unsigned long)ei + item_size; | ||
| 235 | *out_eiref = (struct btrfs_extent_inline_ref *)*ptr; | ||
| 236 | *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); | ||
| 237 | |||
| 238 | *ptr += btrfs_extent_inline_ref_size(*out_type); | ||
| 239 | WARN_ON(*ptr > end); | ||
| 240 | if (*ptr == end) | ||
| 241 | return 1; /* last */ | ||
| 242 | |||
| 243 | return 0; | ||
| 244 | } | ||
| 245 | |||
| 246 | /* | ||
| 247 | * reads the tree block backref for an extent. tree level and root are returned | ||
| 248 | * through out_level and out_root. ptr must point to a 0 value for the first | ||
| 249 | * call and may be modified (see __get_extent_inline_ref comment). | ||
| 250 | * returns 0 if data was provided, 1 if there was no more data to provide or | ||
| 251 | * <0 on error. | ||
| 252 | */ | ||
| 253 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | ||
| 254 | struct btrfs_extent_item *ei, u32 item_size, | ||
| 255 | u64 *out_root, u8 *out_level) | ||
| 256 | { | ||
| 257 | int ret; | ||
| 258 | int type; | ||
| 259 | struct btrfs_tree_block_info *info; | ||
| 260 | struct btrfs_extent_inline_ref *eiref; | ||
| 261 | |||
| 262 | if (*ptr == (unsigned long)-1) | ||
| 263 | return 1; | ||
| 264 | |||
| 265 | while (1) { | ||
| 266 | ret = __get_extent_inline_ref(ptr, eb, ei, item_size, | ||
| 267 | &eiref, &type); | ||
| 268 | if (ret < 0) | ||
| 269 | return ret; | ||
| 270 | |||
| 271 | if (type == BTRFS_TREE_BLOCK_REF_KEY || | ||
| 272 | type == BTRFS_SHARED_BLOCK_REF_KEY) | ||
| 273 | break; | ||
| 274 | |||
| 275 | if (ret == 1) | ||
| 276 | return 1; | ||
| 277 | } | ||
| 278 | |||
| 279 | /* we can treat both ref types equally here */ | ||
| 280 | info = (struct btrfs_tree_block_info *)(ei + 1); | ||
| 281 | *out_root = btrfs_extent_inline_ref_offset(eb, eiref); | ||
| 282 | *out_level = btrfs_tree_block_level(eb, info); | ||
| 283 | |||
| 284 | if (ret == 1) | ||
| 285 | *ptr = (unsigned long)-1; | ||
| 286 | |||
| 287 | return 0; | ||
| 288 | } | ||
| 289 | |||
| 290 | static int __data_list_add(struct list_head *head, u64 inum, | ||
| 291 | u64 extent_data_item_offset, u64 root) | ||
| 292 | { | ||
| 293 | struct __data_ref *ref; | ||
| 294 | |||
| 295 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
| 296 | if (!ref) | ||
| 297 | return -ENOMEM; | ||
| 298 | |||
| 299 | ref->inum = inum; | ||
| 300 | ref->extent_data_item_offset = extent_data_item_offset; | ||
| 301 | ref->root = root; | ||
| 302 | list_add_tail(&ref->list, head); | ||
| 303 | |||
| 304 | return 0; | ||
| 305 | } | ||
| 306 | |||
| 307 | static int __data_list_add_eb(struct list_head *head, struct extent_buffer *eb, | ||
| 308 | struct btrfs_extent_data_ref *dref) | ||
| 309 | { | ||
| 310 | return __data_list_add(head, btrfs_extent_data_ref_objectid(eb, dref), | ||
| 311 | btrfs_extent_data_ref_offset(eb, dref), | ||
| 312 | btrfs_extent_data_ref_root(eb, dref)); | ||
| 313 | } | ||
| 314 | |||
| 315 | static int __shared_list_add(struct list_head *head, u64 disk_byte) | ||
| 316 | { | ||
| 317 | struct __shared_ref *ref; | ||
| 318 | |||
| 319 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
| 320 | if (!ref) | ||
| 321 | return -ENOMEM; | ||
| 322 | |||
| 323 | ref->disk_byte = disk_byte; | ||
| 324 | list_add_tail(&ref->list, head); | ||
| 325 | |||
| 326 | return 0; | ||
| 327 | } | ||
| 328 | |||
| 329 | static int __iter_shared_inline_ref_inodes(struct btrfs_fs_info *fs_info, | ||
| 330 | u64 logical, u64 inum, | ||
| 331 | u64 extent_data_item_offset, | ||
| 332 | u64 extent_offset, | ||
| 333 | struct btrfs_path *path, | ||
| 334 | struct list_head *data_refs, | ||
| 335 | iterate_extent_inodes_t *iterate, | ||
| 336 | void *ctx) | ||
| 337 | { | ||
| 338 | u64 ref_root; | ||
| 339 | u32 item_size; | ||
| 340 | struct btrfs_key key; | ||
| 341 | struct extent_buffer *eb; | ||
| 342 | struct btrfs_extent_item *ei; | ||
| 343 | struct btrfs_extent_inline_ref *eiref; | ||
| 344 | struct __data_ref *ref; | ||
| 345 | int ret; | ||
| 346 | int type; | ||
| 347 | int last; | ||
| 348 | unsigned long ptr = 0; | ||
| 349 | |||
| 350 | WARN_ON(!list_empty(data_refs)); | ||
| 351 | ret = extent_from_logical(fs_info, logical, path, &key); | ||
| 352 | if (ret & BTRFS_EXTENT_FLAG_DATA) | ||
| 353 | ret = -EIO; | ||
| 354 | if (ret < 0) | ||
| 355 | goto out; | ||
| 356 | |||
| 357 | eb = path->nodes[0]; | ||
| 358 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | ||
| 359 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | ||
| 360 | |||
| 361 | ret = 0; | ||
| 362 | ref_root = 0; | ||
| 363 | /* | ||
| 364 | * as done in iterate_extent_inodes, we first build a list of refs to | ||
| 365 | * iterate, then free the path and then iterate them to avoid deadlocks. | ||
| 366 | */ | ||
| 367 | do { | ||
| 368 | last = __get_extent_inline_ref(&ptr, eb, ei, item_size, | ||
| 369 | &eiref, &type); | ||
| 370 | if (last < 0) { | ||
| 371 | ret = last; | ||
| 372 | goto out; | ||
| 373 | } | ||
| 374 | if (type == BTRFS_TREE_BLOCK_REF_KEY || | ||
| 375 | type == BTRFS_SHARED_BLOCK_REF_KEY) { | ||
| 376 | ref_root = btrfs_extent_inline_ref_offset(eb, eiref); | ||
| 377 | ret = __data_list_add(data_refs, inum, | ||
| 378 | extent_data_item_offset, | ||
| 379 | ref_root); | ||
| 380 | } | ||
| 381 | } while (!ret && !last); | ||
| 382 | |||
| 383 | btrfs_release_path(path); | ||
| 384 | |||
| 385 | if (ref_root == 0) { | ||
| 386 | printk(KERN_ERR "btrfs: failed to find tree block ref " | ||
| 387 | "for shared data backref %llu\n", logical); | ||
| 388 | WARN_ON(1); | ||
| 389 | ret = -EIO; | ||
| 390 | } | ||
| 391 | |||
| 392 | out: | ||
| 393 | while (!list_empty(data_refs)) { | ||
| 394 | ref = list_first_entry(data_refs, struct __data_ref, list); | ||
| 395 | list_del(&ref->list); | ||
| 396 | if (!ret) | ||
| 397 | ret = iterate(ref->inum, extent_offset + | ||
| 398 | ref->extent_data_item_offset, | ||
| 399 | ref->root, ctx); | ||
| 400 | kfree(ref); | ||
| 401 | } | ||
| 402 | |||
| 403 | return ret; | ||
| 404 | } | ||
| 405 | |||
| 406 | static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info, | ||
| 407 | u64 logical, u64 orig_extent_item_objectid, | ||
| 408 | u64 extent_offset, struct btrfs_path *path, | ||
| 409 | struct list_head *data_refs, | ||
| 410 | iterate_extent_inodes_t *iterate, | ||
| 411 | void *ctx) | ||
| 412 | { | ||
| 413 | u64 disk_byte; | ||
| 414 | struct btrfs_key key; | ||
| 415 | struct btrfs_file_extent_item *fi; | ||
| 416 | struct extent_buffer *eb; | ||
| 417 | int slot; | ||
| 418 | int nritems; | ||
| 419 | int ret; | ||
| 420 | int found = 0; | ||
| 421 | |||
| 422 | eb = read_tree_block(fs_info->tree_root, logical, | ||
| 423 | fs_info->tree_root->leafsize, 0); | ||
| 424 | if (!eb) | ||
| 425 | return -EIO; | ||
| 426 | |||
| 427 | /* | ||
| 428 | * from the shared data ref, we only have the leaf but we need | ||
| 429 | * the key. thus, we must look into all items and see that we | ||
| 430 | * find one (some) with a reference to our extent item. | ||
| 431 | */ | ||
| 432 | nritems = btrfs_header_nritems(eb); | ||
| 433 | for (slot = 0; slot < nritems; ++slot) { | ||
| 434 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
| 435 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 436 | continue; | ||
| 437 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
| 438 | if (!fi) { | ||
| 439 | free_extent_buffer(eb); | ||
| 440 | return -EIO; | ||
| 441 | } | ||
| 442 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
| 443 | if (disk_byte != orig_extent_item_objectid) { | ||
| 444 | if (found) | ||
| 445 | break; | ||
| 446 | else | ||
| 447 | continue; | ||
| 448 | } | ||
| 449 | ++found; | ||
| 450 | ret = __iter_shared_inline_ref_inodes(fs_info, logical, | ||
| 451 | key.objectid, | ||
| 452 | key.offset, | ||
| 453 | extent_offset, path, | ||
| 454 | data_refs, | ||
| 455 | iterate, ctx); | ||
| 456 | if (ret) | ||
| 457 | break; | ||
| 458 | } | ||
| 459 | |||
| 460 | if (!found) { | ||
| 461 | printk(KERN_ERR "btrfs: failed to follow shared data backref " | ||
| 462 | "to parent %llu\n", logical); | ||
| 463 | WARN_ON(1); | ||
| 464 | ret = -EIO; | ||
| 465 | } | ||
| 466 | |||
| 467 | free_extent_buffer(eb); | ||
| 468 | return ret; | ||
| 469 | } | ||
| 470 | |||
| 471 | /* | ||
| 472 | * calls iterate() for every inode that references the extent identified by | ||
| 473 | * the given parameters. will use the path given as a parameter and return it | ||
| 474 | * released. | ||
| 475 | * when the iterator function returns a non-zero value, iteration stops. | ||
| 476 | */ | ||
| 477 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | ||
| 478 | struct btrfs_path *path, | ||
| 479 | u64 extent_item_objectid, | ||
| 480 | u64 extent_offset, | ||
| 481 | iterate_extent_inodes_t *iterate, void *ctx) | ||
| 482 | { | ||
| 483 | unsigned long ptr = 0; | ||
| 484 | int last; | ||
| 485 | int ret; | ||
| 486 | int type; | ||
| 487 | u64 logical; | ||
| 488 | u32 item_size; | ||
| 489 | struct btrfs_extent_inline_ref *eiref; | ||
| 490 | struct btrfs_extent_data_ref *dref; | ||
| 491 | struct extent_buffer *eb; | ||
| 492 | struct btrfs_extent_item *ei; | ||
| 493 | struct btrfs_key key; | ||
| 494 | struct list_head data_refs = LIST_HEAD_INIT(data_refs); | ||
| 495 | struct list_head shared_refs = LIST_HEAD_INIT(shared_refs); | ||
| 496 | struct __data_ref *ref_d; | ||
| 497 | struct __shared_ref *ref_s; | ||
| 498 | |||
| 499 | eb = path->nodes[0]; | ||
| 500 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | ||
| 501 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | ||
| 502 | |||
| 503 | /* first we iterate the inline refs, ... */ | ||
| 504 | do { | ||
| 505 | last = __get_extent_inline_ref(&ptr, eb, ei, item_size, | ||
| 506 | &eiref, &type); | ||
| 507 | if (last == -ENOENT) { | ||
| 508 | ret = 0; | ||
| 509 | break; | ||
| 510 | } | ||
| 511 | if (last < 0) { | ||
| 512 | ret = last; | ||
| 513 | break; | ||
| 514 | } | ||
| 515 | |||
| 516 | if (type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
| 517 | dref = (struct btrfs_extent_data_ref *)(&eiref->offset); | ||
| 518 | ret = __data_list_add_eb(&data_refs, eb, dref); | ||
| 519 | } else if (type == BTRFS_SHARED_DATA_REF_KEY) { | ||
| 520 | logical = btrfs_extent_inline_ref_offset(eb, eiref); | ||
| 521 | ret = __shared_list_add(&shared_refs, logical); | ||
| 522 | } | ||
| 523 | } while (!ret && !last); | ||
| 524 | |||
| 525 | /* ... then we proceed to in-tree references and ... */ | ||
| 526 | while (!ret) { | ||
| 527 | ++path->slots[0]; | ||
| 528 | if (path->slots[0] > btrfs_header_nritems(eb)) { | ||
| 529 | ret = btrfs_next_leaf(fs_info->extent_root, path); | ||
| 530 | if (ret) { | ||
| 531 | if (ret == 1) | ||
| 532 | ret = 0; /* we're done */ | ||
| 533 | break; | ||
| 534 | } | ||
| 535 | eb = path->nodes[0]; | ||
| 536 | } | ||
| 537 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | ||
| 538 | if (key.objectid != extent_item_objectid) | ||
| 539 | break; | ||
| 540 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
| 541 | dref = btrfs_item_ptr(eb, path->slots[0], | ||
| 542 | struct btrfs_extent_data_ref); | ||
| 543 | ret = __data_list_add_eb(&data_refs, eb, dref); | ||
| 544 | } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { | ||
| 545 | ret = __shared_list_add(&shared_refs, key.offset); | ||
| 546 | } | ||
| 547 | } | ||
| 548 | |||
| 549 | btrfs_release_path(path); | ||
| 550 | |||
| 551 | /* | ||
| 552 | * ... only at the very end we can process the refs we found. this is | ||
| 553 | * because the iterator function we call is allowed to make tree lookups | ||
| 554 | * and we have to avoid deadlocks. additionally, we need more tree | ||
| 555 | * lookups ourselves for shared data refs. | ||
| 556 | */ | ||
| 557 | while (!list_empty(&data_refs)) { | ||
| 558 | ref_d = list_first_entry(&data_refs, struct __data_ref, list); | ||
| 559 | list_del(&ref_d->list); | ||
| 560 | if (!ret) | ||
| 561 | ret = iterate(ref_d->inum, extent_offset + | ||
| 562 | ref_d->extent_data_item_offset, | ||
| 563 | ref_d->root, ctx); | ||
| 564 | kfree(ref_d); | ||
| 565 | } | ||
| 566 | |||
| 567 | while (!list_empty(&shared_refs)) { | ||
| 568 | ref_s = list_first_entry(&shared_refs, struct __shared_ref, | ||
| 569 | list); | ||
| 570 | list_del(&ref_s->list); | ||
| 571 | if (!ret) | ||
| 572 | ret = __iter_shared_inline_ref(fs_info, | ||
| 573 | ref_s->disk_byte, | ||
| 574 | extent_item_objectid, | ||
| 575 | extent_offset, path, | ||
| 576 | &data_refs, | ||
| 577 | iterate, ctx); | ||
| 578 | kfree(ref_s); | ||
| 579 | } | ||
| 580 | |||
| 581 | return ret; | ||
| 582 | } | ||
| 583 | |||
| 584 | int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | ||
| 585 | struct btrfs_path *path, | ||
| 586 | iterate_extent_inodes_t *iterate, void *ctx) | ||
| 587 | { | ||
| 588 | int ret; | ||
| 589 | u64 offset; | ||
| 590 | struct btrfs_key found_key; | ||
| 591 | |||
| 592 | ret = extent_from_logical(fs_info, logical, path, | ||
| 593 | &found_key); | ||
| 594 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
| 595 | ret = -EINVAL; | ||
| 596 | if (ret < 0) | ||
| 597 | return ret; | ||
| 598 | |||
| 599 | offset = logical - found_key.objectid; | ||
| 600 | ret = iterate_extent_inodes(fs_info, path, found_key.objectid, | ||
| 601 | offset, iterate, ctx); | ||
| 602 | |||
| 603 | return ret; | ||
| 604 | } | ||
| 605 | |||
| 606 | static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | ||
| 607 | struct btrfs_path *path, | ||
| 608 | iterate_irefs_t *iterate, void *ctx) | ||
| 609 | { | ||
| 610 | int ret; | ||
| 611 | int slot; | ||
| 612 | u32 cur; | ||
| 613 | u32 len; | ||
| 614 | u32 name_len; | ||
| 615 | u64 parent = 0; | ||
| 616 | int found = 0; | ||
| 617 | struct extent_buffer *eb; | ||
| 618 | struct btrfs_item *item; | ||
| 619 | struct btrfs_inode_ref *iref; | ||
| 620 | struct btrfs_key found_key; | ||
| 621 | |||
| 622 | while (1) { | ||
| 623 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, | ||
| 624 | &found_key); | ||
| 625 | if (ret < 0) | ||
| 626 | break; | ||
| 627 | if (ret) { | ||
| 628 | ret = found ? 0 : -ENOENT; | ||
| 629 | break; | ||
| 630 | } | ||
| 631 | ++found; | ||
| 632 | |||
| 633 | parent = found_key.offset; | ||
| 634 | slot = path->slots[0]; | ||
| 635 | eb = path->nodes[0]; | ||
| 636 | /* make sure we can use eb after releasing the path */ | ||
| 637 | atomic_inc(&eb->refs); | ||
| 638 | btrfs_release_path(path); | ||
| 639 | |||
| 640 | item = btrfs_item_nr(eb, slot); | ||
| 641 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | ||
| 642 | |||
| 643 | for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { | ||
| 644 | name_len = btrfs_inode_ref_name_len(eb, iref); | ||
| 645 | /* path must be released before calling iterate()! */ | ||
| 646 | ret = iterate(parent, iref, eb, ctx); | ||
| 647 | if (ret) { | ||
| 648 | free_extent_buffer(eb); | ||
| 649 | break; | ||
| 650 | } | ||
| 651 | len = sizeof(*iref) + name_len; | ||
| 652 | iref = (struct btrfs_inode_ref *)((char *)iref + len); | ||
| 653 | } | ||
| 654 | free_extent_buffer(eb); | ||
| 655 | } | ||
| 656 | |||
| 657 | btrfs_release_path(path); | ||
| 658 | |||
| 659 | return ret; | ||
| 660 | } | ||
| 661 | |||
| 662 | /* | ||
| 663 | * returns 0 if the path could be dumped (probably truncated) | ||
| 664 | * returns <0 in case of an error | ||
| 665 | */ | ||
| 666 | static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | ||
| 667 | struct extent_buffer *eb, void *ctx) | ||
| 668 | { | ||
| 669 | struct inode_fs_paths *ipath = ctx; | ||
| 670 | char *fspath; | ||
| 671 | char *fspath_min; | ||
| 672 | int i = ipath->fspath->elem_cnt; | ||
| 673 | const int s_ptr = sizeof(char *); | ||
| 674 | u32 bytes_left; | ||
| 675 | |||
| 676 | bytes_left = ipath->fspath->bytes_left > s_ptr ? | ||
| 677 | ipath->fspath->bytes_left - s_ptr : 0; | ||
| 678 | |||
| 679 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; | ||
| 680 | fspath = iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, | ||
| 681 | inum, fspath_min, bytes_left); | ||
| 682 | if (IS_ERR(fspath)) | ||
| 683 | return PTR_ERR(fspath); | ||
| 684 | |||
| 685 | if (fspath > fspath_min) { | ||
| 686 | ipath->fspath->val[i] = (u64)fspath; | ||
| 687 | ++ipath->fspath->elem_cnt; | ||
| 688 | ipath->fspath->bytes_left = fspath - fspath_min; | ||
| 689 | } else { | ||
| 690 | ++ipath->fspath->elem_missed; | ||
| 691 | ipath->fspath->bytes_missing += fspath_min - fspath; | ||
| 692 | ipath->fspath->bytes_left = 0; | ||
| 693 | } | ||
| 694 | |||
| 695 | return 0; | ||
| 696 | } | ||
| 697 | |||
| 698 | /* | ||
| 699 | * this dumps all file system paths to the inode into the ipath struct, provided | ||
| 700 | * is has been created large enough. each path is zero-terminated and accessed | ||
| 701 | * from ipath->fspath->val[i]. | ||
| 702 | * when it returns, there are ipath->fspath->elem_cnt number of paths available | ||
| 703 | * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the | ||
| 704 | * number of missed paths in recored in ipath->fspath->elem_missed, otherwise, | ||
| 705 | * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would | ||
| 706 | * have been needed to return all paths. | ||
| 707 | */ | ||
| 708 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) | ||
| 709 | { | ||
| 710 | return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, | ||
| 711 | inode_to_path, ipath); | ||
| 712 | } | ||
| 713 | |||
| 714 | /* | ||
| 715 | * allocates space to return multiple file system paths for an inode. | ||
| 716 | * total_bytes to allocate are passed, note that space usable for actual path | ||
| 717 | * information will be total_bytes - sizeof(struct inode_fs_paths). | ||
| 718 | * the returned pointer must be freed with free_ipath() in the end. | ||
| 719 | */ | ||
| 720 | struct btrfs_data_container *init_data_container(u32 total_bytes) | ||
| 721 | { | ||
| 722 | struct btrfs_data_container *data; | ||
| 723 | size_t alloc_bytes; | ||
| 724 | |||
| 725 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); | ||
| 726 | data = kmalloc(alloc_bytes, GFP_NOFS); | ||
| 727 | if (!data) | ||
| 728 | return ERR_PTR(-ENOMEM); | ||
| 729 | |||
| 730 | if (total_bytes >= sizeof(*data)) { | ||
| 731 | data->bytes_left = total_bytes - sizeof(*data); | ||
| 732 | data->bytes_missing = 0; | ||
| 733 | } else { | ||
| 734 | data->bytes_missing = sizeof(*data) - total_bytes; | ||
| 735 | data->bytes_left = 0; | ||
| 736 | } | ||
| 737 | |||
| 738 | data->elem_cnt = 0; | ||
| 739 | data->elem_missed = 0; | ||
| 740 | |||
| 741 | return data; | ||
| 742 | } | ||
| 743 | |||
| 744 | /* | ||
| 745 | * allocates space to return multiple file system paths for an inode. | ||
| 746 | * total_bytes to allocate are passed, note that space usable for actual path | ||
| 747 | * information will be total_bytes - sizeof(struct inode_fs_paths). | ||
| 748 | * the returned pointer must be freed with free_ipath() in the end. | ||
| 749 | */ | ||
| 750 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | ||
| 751 | struct btrfs_path *path) | ||
| 752 | { | ||
| 753 | struct inode_fs_paths *ifp; | ||
| 754 | struct btrfs_data_container *fspath; | ||
| 755 | |||
| 756 | fspath = init_data_container(total_bytes); | ||
| 757 | if (IS_ERR(fspath)) | ||
| 758 | return (void *)fspath; | ||
| 759 | |||
| 760 | ifp = kmalloc(sizeof(*ifp), GFP_NOFS); | ||
| 761 | if (!ifp) { | ||
| 762 | kfree(fspath); | ||
| 763 | return ERR_PTR(-ENOMEM); | ||
| 764 | } | ||
| 765 | |||
| 766 | ifp->btrfs_path = path; | ||
| 767 | ifp->fspath = fspath; | ||
| 768 | ifp->fs_root = fs_root; | ||
| 769 | |||
| 770 | return ifp; | ||
| 771 | } | ||
| 772 | |||
| 773 | void free_ipath(struct inode_fs_paths *ipath) | ||
| 774 | { | ||
| 775 | kfree(ipath); | ||
| 776 | } | ||
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h new file mode 100644 index 000000000000..92618837cb8f --- /dev/null +++ b/fs/btrfs/backref.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #ifndef __BTRFS_BACKREF__ | ||
| 20 | #define __BTRFS_BACKREF__ | ||
| 21 | |||
| 22 | #include "ioctl.h" | ||
| 23 | |||
| 24 | struct inode_fs_paths { | ||
| 25 | struct btrfs_path *btrfs_path; | ||
| 26 | struct btrfs_root *fs_root; | ||
| 27 | struct btrfs_data_container *fspath; | ||
| 28 | }; | ||
| 29 | |||
| 30 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, | ||
| 31 | void *ctx); | ||
| 32 | typedef int (iterate_irefs_t)(u64 parent, struct btrfs_inode_ref *iref, | ||
| 33 | struct extent_buffer *eb, void *ctx); | ||
| 34 | |||
| 35 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | ||
| 36 | struct btrfs_path *path); | ||
| 37 | |||
| 38 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | ||
| 39 | struct btrfs_path *path, struct btrfs_key *found_key); | ||
| 40 | |||
| 41 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | ||
| 42 | struct btrfs_extent_item *ei, u32 item_size, | ||
| 43 | u64 *out_root, u8 *out_level); | ||
| 44 | |||
| 45 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | ||
| 46 | struct btrfs_path *path, | ||
| 47 | u64 extent_item_objectid, | ||
| 48 | u64 extent_offset, | ||
| 49 | iterate_extent_inodes_t *iterate, void *ctx); | ||
| 50 | |||
| 51 | int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | ||
| 52 | struct btrfs_path *path, | ||
| 53 | iterate_extent_inodes_t *iterate, void *ctx); | ||
| 54 | |||
| 55 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | ||
| 56 | |||
| 57 | struct btrfs_data_container *init_data_container(u32 total_bytes); | ||
| 58 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | ||
| 59 | struct btrfs_path *path); | ||
| 60 | void free_ipath(struct inode_fs_paths *ipath); | ||
| 61 | |||
| 62 | #endif | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d9f99a16edd6..5a5d325a3935 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -103,11 +103,6 @@ struct btrfs_inode { | |||
| 103 | */ | 103 | */ |
| 104 | u64 delalloc_bytes; | 104 | u64 delalloc_bytes; |
| 105 | 105 | ||
| 106 | /* total number of bytes that may be used for this inode for | ||
| 107 | * delalloc | ||
| 108 | */ | ||
| 109 | u64 reserved_bytes; | ||
| 110 | |||
| 111 | /* | 106 | /* |
| 112 | * the size of the file stored in the metadata on disk. data=ordered | 107 | * the size of the file stored in the metadata on disk. data=ordered |
| 113 | * means the in-memory i_size might be larger than the size on disk | 108 | * means the in-memory i_size might be larger than the size on disk |
| @@ -115,9 +110,6 @@ struct btrfs_inode { | |||
| 115 | */ | 110 | */ |
| 116 | u64 disk_i_size; | 111 | u64 disk_i_size; |
| 117 | 112 | ||
| 118 | /* flags field from the on disk inode */ | ||
| 119 | u32 flags; | ||
| 120 | |||
| 121 | /* | 113 | /* |
| 122 | * if this is a directory then index_cnt is the counter for the index | 114 | * if this is a directory then index_cnt is the counter for the index |
| 123 | * number for new files that are created | 115 | * number for new files that are created |
| @@ -132,6 +124,15 @@ struct btrfs_inode { | |||
| 132 | u64 last_unlink_trans; | 124 | u64 last_unlink_trans; |
| 133 | 125 | ||
| 134 | /* | 126 | /* |
| 127 | * Number of bytes outstanding that are going to need csums. This is | ||
| 128 | * used in ENOSPC accounting. | ||
| 129 | */ | ||
| 130 | u64 csum_bytes; | ||
| 131 | |||
| 132 | /* flags field from the on disk inode */ | ||
| 133 | u32 flags; | ||
| 134 | |||
| 135 | /* | ||
| 135 | * Counters to keep track of the number of extent item's we may use due | 136 | * Counters to keep track of the number of extent item's we may use due |
| 136 | * to delalloc and such. outstanding_extents is the number of extent | 137 | * to delalloc and such. outstanding_extents is the number of extent |
| 137 | * items we think we'll end up using, and reserved_extents is the number | 138 | * items we think we'll end up using, and reserved_extents is the number |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8ec5d86f1734..14f1c5a0b2d2 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -85,7 +85,8 @@ struct compressed_bio { | |||
| 85 | static inline int compressed_bio_size(struct btrfs_root *root, | 85 | static inline int compressed_bio_size(struct btrfs_root *root, |
| 86 | unsigned long disk_size) | 86 | unsigned long disk_size) |
| 87 | { | 87 | { |
| 88 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | 88 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 89 | |||
| 89 | return sizeof(struct compressed_bio) + | 90 | return sizeof(struct compressed_bio) + |
| 90 | ((disk_size + root->sectorsize - 1) / root->sectorsize) * | 91 | ((disk_size + root->sectorsize - 1) / root->sectorsize) * |
| 91 | csum_size; | 92 | csum_size; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 011cab3aca8d..0fe615e4ea38 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -902,9 +902,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 902 | 902 | ||
| 903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | 903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); |
| 904 | 904 | ||
| 905 | if (level < BTRFS_MAX_LEVEL - 1) | 905 | if (level < BTRFS_MAX_LEVEL - 1) { |
| 906 | parent = path->nodes[level + 1]; | 906 | parent = path->nodes[level + 1]; |
| 907 | pslot = path->slots[level + 1]; | 907 | pslot = path->slots[level + 1]; |
| 908 | } | ||
| 908 | 909 | ||
| 909 | /* | 910 | /* |
| 910 | * deal with the case where there is only one pointer in the root | 911 | * deal with the case where there is only one pointer in the root |
| @@ -1107,9 +1108,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 1107 | mid = path->nodes[level]; | 1108 | mid = path->nodes[level]; |
| 1108 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 1109 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
| 1109 | 1110 | ||
| 1110 | if (level < BTRFS_MAX_LEVEL - 1) | 1111 | if (level < BTRFS_MAX_LEVEL - 1) { |
| 1111 | parent = path->nodes[level + 1]; | 1112 | parent = path->nodes[level + 1]; |
| 1112 | pslot = path->slots[level + 1]; | 1113 | pslot = path->slots[level + 1]; |
| 1114 | } | ||
| 1113 | 1115 | ||
| 1114 | if (!parent) | 1116 | if (!parent) |
| 1115 | return 1; | 1117 | return 1; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03912c5c6f49..b9ba59ff9292 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/kobject.h> | 30 | #include <linux/kobject.h> |
| 31 | #include <trace/events/btrfs.h> | 31 | #include <trace/events/btrfs.h> |
| 32 | #include <asm/kmap_types.h> | 32 | #include <asm/kmap_types.h> |
| 33 | #include <linux/pagemap.h> | ||
| 33 | #include "extent_io.h" | 34 | #include "extent_io.h" |
| 34 | #include "extent_map.h" | 35 | #include "extent_map.h" |
| 35 | #include "async-thread.h" | 36 | #include "async-thread.h" |
| @@ -360,6 +361,47 @@ struct btrfs_header { | |||
| 360 | #define BTRFS_LABEL_SIZE 256 | 361 | #define BTRFS_LABEL_SIZE 256 |
| 361 | 362 | ||
| 362 | /* | 363 | /* |
| 364 | * just in case we somehow lose the roots and are not able to mount, | ||
| 365 | * we store an array of the roots from previous transactions | ||
| 366 | * in the super. | ||
| 367 | */ | ||
| 368 | #define BTRFS_NUM_BACKUP_ROOTS 4 | ||
| 369 | struct btrfs_root_backup { | ||
| 370 | __le64 tree_root; | ||
| 371 | __le64 tree_root_gen; | ||
| 372 | |||
| 373 | __le64 chunk_root; | ||
| 374 | __le64 chunk_root_gen; | ||
| 375 | |||
| 376 | __le64 extent_root; | ||
| 377 | __le64 extent_root_gen; | ||
| 378 | |||
| 379 | __le64 fs_root; | ||
| 380 | __le64 fs_root_gen; | ||
| 381 | |||
| 382 | __le64 dev_root; | ||
| 383 | __le64 dev_root_gen; | ||
| 384 | |||
| 385 | __le64 csum_root; | ||
| 386 | __le64 csum_root_gen; | ||
| 387 | |||
| 388 | __le64 total_bytes; | ||
| 389 | __le64 bytes_used; | ||
| 390 | __le64 num_devices; | ||
| 391 | /* future */ | ||
| 392 | __le64 unsed_64[4]; | ||
| 393 | |||
| 394 | u8 tree_root_level; | ||
| 395 | u8 chunk_root_level; | ||
| 396 | u8 extent_root_level; | ||
| 397 | u8 fs_root_level; | ||
| 398 | u8 dev_root_level; | ||
| 399 | u8 csum_root_level; | ||
| 400 | /* future and to align */ | ||
| 401 | u8 unused_8[10]; | ||
| 402 | } __attribute__ ((__packed__)); | ||
| 403 | |||
| 404 | /* | ||
| 363 | * the super block basically lists the main trees of the FS | 405 | * the super block basically lists the main trees of the FS |
| 364 | * it currently lacks any block count etc etc | 406 | * it currently lacks any block count etc etc |
| 365 | */ | 407 | */ |
| @@ -405,6 +447,7 @@ struct btrfs_super_block { | |||
| 405 | /* future expansion */ | 447 | /* future expansion */ |
| 406 | __le64 reserved[31]; | 448 | __le64 reserved[31]; |
| 407 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; | 449 | u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; |
| 450 | struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; | ||
| 408 | } __attribute__ ((__packed__)); | 451 | } __attribute__ ((__packed__)); |
| 409 | 452 | ||
| 410 | /* | 453 | /* |
| @@ -772,14 +815,8 @@ struct btrfs_space_info { | |||
| 772 | struct btrfs_block_rsv { | 815 | struct btrfs_block_rsv { |
| 773 | u64 size; | 816 | u64 size; |
| 774 | u64 reserved; | 817 | u64 reserved; |
| 775 | u64 freed[2]; | ||
| 776 | struct btrfs_space_info *space_info; | 818 | struct btrfs_space_info *space_info; |
| 777 | struct list_head list; | ||
| 778 | spinlock_t lock; | 819 | spinlock_t lock; |
| 779 | atomic_t usage; | ||
| 780 | unsigned int priority:8; | ||
| 781 | unsigned int durable:1; | ||
| 782 | unsigned int refill_used:1; | ||
| 783 | unsigned int full:1; | 820 | unsigned int full:1; |
| 784 | }; | 821 | }; |
| 785 | 822 | ||
| @@ -840,10 +877,10 @@ struct btrfs_block_group_cache { | |||
| 840 | spinlock_t lock; | 877 | spinlock_t lock; |
| 841 | u64 pinned; | 878 | u64 pinned; |
| 842 | u64 reserved; | 879 | u64 reserved; |
| 843 | u64 reserved_pinned; | ||
| 844 | u64 bytes_super; | 880 | u64 bytes_super; |
| 845 | u64 flags; | 881 | u64 flags; |
| 846 | u64 sectorsize; | 882 | u64 sectorsize; |
| 883 | u64 cache_generation; | ||
| 847 | unsigned int ro:1; | 884 | unsigned int ro:1; |
| 848 | unsigned int dirty:1; | 885 | unsigned int dirty:1; |
| 849 | unsigned int iref:1; | 886 | unsigned int iref:1; |
| @@ -899,6 +936,10 @@ struct btrfs_fs_info { | |||
| 899 | spinlock_t block_group_cache_lock; | 936 | spinlock_t block_group_cache_lock; |
| 900 | struct rb_root block_group_cache_tree; | 937 | struct rb_root block_group_cache_tree; |
| 901 | 938 | ||
| 939 | /* keep track of unallocated space */ | ||
| 940 | spinlock_t free_chunk_lock; | ||
| 941 | u64 free_chunk_space; | ||
| 942 | |||
| 902 | struct extent_io_tree freed_extents[2]; | 943 | struct extent_io_tree freed_extents[2]; |
| 903 | struct extent_io_tree *pinned_extents; | 944 | struct extent_io_tree *pinned_extents; |
| 904 | 945 | ||
| @@ -916,14 +957,11 @@ struct btrfs_fs_info { | |||
| 916 | struct btrfs_block_rsv trans_block_rsv; | 957 | struct btrfs_block_rsv trans_block_rsv; |
| 917 | /* block reservation for chunk tree */ | 958 | /* block reservation for chunk tree */ |
| 918 | struct btrfs_block_rsv chunk_block_rsv; | 959 | struct btrfs_block_rsv chunk_block_rsv; |
| 960 | /* block reservation for delayed operations */ | ||
| 961 | struct btrfs_block_rsv delayed_block_rsv; | ||
| 919 | 962 | ||
| 920 | struct btrfs_block_rsv empty_block_rsv; | 963 | struct btrfs_block_rsv empty_block_rsv; |
| 921 | 964 | ||
| 922 | /* list of block reservations that cross multiple transactions */ | ||
| 923 | struct list_head durable_block_rsv_list; | ||
| 924 | |||
| 925 | struct mutex durable_block_rsv_mutex; | ||
| 926 | |||
| 927 | u64 generation; | 965 | u64 generation; |
| 928 | u64 last_trans_committed; | 966 | u64 last_trans_committed; |
| 929 | 967 | ||
| @@ -942,8 +980,8 @@ struct btrfs_fs_info { | |||
| 942 | wait_queue_head_t transaction_blocked_wait; | 980 | wait_queue_head_t transaction_blocked_wait; |
| 943 | wait_queue_head_t async_submit_wait; | 981 | wait_queue_head_t async_submit_wait; |
| 944 | 982 | ||
| 945 | struct btrfs_super_block super_copy; | 983 | struct btrfs_super_block *super_copy; |
| 946 | struct btrfs_super_block super_for_commit; | 984 | struct btrfs_super_block *super_for_commit; |
| 947 | struct block_device *__bdev; | 985 | struct block_device *__bdev; |
| 948 | struct super_block *sb; | 986 | struct super_block *sb; |
| 949 | struct inode *btree_inode; | 987 | struct inode *btree_inode; |
| @@ -1036,6 +1074,7 @@ struct btrfs_fs_info { | |||
| 1036 | struct btrfs_workers endio_freespace_worker; | 1074 | struct btrfs_workers endio_freespace_worker; |
| 1037 | struct btrfs_workers submit_workers; | 1075 | struct btrfs_workers submit_workers; |
| 1038 | struct btrfs_workers caching_workers; | 1076 | struct btrfs_workers caching_workers; |
| 1077 | struct btrfs_workers readahead_workers; | ||
| 1039 | 1078 | ||
| 1040 | /* | 1079 | /* |
| 1041 | * fixup workers take dirty pages that didn't properly go through | 1080 | * fixup workers take dirty pages that didn't properly go through |
| @@ -1119,6 +1158,13 @@ struct btrfs_fs_info { | |||
| 1119 | u64 fs_state; | 1158 | u64 fs_state; |
| 1120 | 1159 | ||
| 1121 | struct btrfs_delayed_root *delayed_root; | 1160 | struct btrfs_delayed_root *delayed_root; |
| 1161 | |||
| 1162 | /* readahead tree */ | ||
| 1163 | spinlock_t reada_lock; | ||
| 1164 | struct radix_tree_root reada_tree; | ||
| 1165 | |||
| 1166 | /* next backup root to be overwritten */ | ||
| 1167 | int backup_root_index; | ||
| 1122 | }; | 1168 | }; |
| 1123 | 1169 | ||
| 1124 | /* | 1170 | /* |
| @@ -1363,6 +1409,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 1363 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | 1409 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) |
| 1364 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | 1410 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) |
| 1365 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) | 1411 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) |
| 1412 | #define BTRFS_MOUNT_RECOVERY (1 << 18) | ||
| 1366 | 1413 | ||
| 1367 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1414 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
| 1368 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1415 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
| @@ -1978,6 +2025,55 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root) | |||
| 1978 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | 2025 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; |
| 1979 | } | 2026 | } |
| 1980 | 2027 | ||
| 2028 | /* struct btrfs_root_backup */ | ||
| 2029 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, | ||
| 2030 | tree_root, 64); | ||
| 2031 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, | ||
| 2032 | tree_root_gen, 64); | ||
| 2033 | BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, | ||
| 2034 | tree_root_level, 8); | ||
| 2035 | |||
| 2036 | BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, | ||
| 2037 | chunk_root, 64); | ||
| 2038 | BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, | ||
| 2039 | chunk_root_gen, 64); | ||
| 2040 | BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, | ||
| 2041 | chunk_root_level, 8); | ||
| 2042 | |||
| 2043 | BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, | ||
| 2044 | extent_root, 64); | ||
| 2045 | BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, | ||
| 2046 | extent_root_gen, 64); | ||
| 2047 | BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, | ||
| 2048 | extent_root_level, 8); | ||
| 2049 | |||
| 2050 | BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, | ||
| 2051 | fs_root, 64); | ||
| 2052 | BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, | ||
| 2053 | fs_root_gen, 64); | ||
| 2054 | BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, | ||
| 2055 | fs_root_level, 8); | ||
| 2056 | |||
| 2057 | BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, | ||
| 2058 | dev_root, 64); | ||
| 2059 | BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, | ||
| 2060 | dev_root_gen, 64); | ||
| 2061 | BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, | ||
| 2062 | dev_root_level, 8); | ||
| 2063 | |||
| 2064 | BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, | ||
| 2065 | csum_root, 64); | ||
| 2066 | BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, | ||
| 2067 | csum_root_gen, 64); | ||
| 2068 | BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, | ||
| 2069 | csum_root_level, 8); | ||
| 2070 | BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, | ||
| 2071 | total_bytes, 64); | ||
| 2072 | BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, | ||
| 2073 | bytes_used, 64); | ||
| 2074 | BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, | ||
| 2075 | num_devices, 64); | ||
| 2076 | |||
| 1981 | /* struct btrfs_super_block */ | 2077 | /* struct btrfs_super_block */ |
| 1982 | 2078 | ||
| 1983 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 2079 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
| @@ -2129,6 +2225,11 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | |||
| 2129 | (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); | 2225 | (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); |
| 2130 | } | 2226 | } |
| 2131 | 2227 | ||
| 2228 | static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) | ||
| 2229 | { | ||
| 2230 | return mapping_gfp_mask(mapping) & ~__GFP_FS; | ||
| 2231 | } | ||
| 2232 | |||
| 2132 | /* extent-tree.c */ | 2233 | /* extent-tree.c */ |
| 2133 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 2234 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
| 2134 | unsigned num_items) | 2235 | unsigned num_items) |
| @@ -2137,6 +2238,17 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | |||
| 2137 | 3 * num_items; | 2238 | 3 * num_items; |
| 2138 | } | 2239 | } |
| 2139 | 2240 | ||
| 2241 | /* | ||
| 2242 | * Doing a truncate won't result in new nodes or leaves, just what we need for | ||
| 2243 | * COW. | ||
| 2244 | */ | ||
| 2245 | static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, | ||
| 2246 | unsigned num_items) | ||
| 2247 | { | ||
| 2248 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
| 2249 | num_items; | ||
| 2250 | } | ||
| 2251 | |||
| 2140 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 2252 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
| 2141 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 2253 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
| 2142 | struct btrfs_root *root, unsigned long count); | 2254 | struct btrfs_root *root, unsigned long count); |
| @@ -2146,6 +2258,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | |||
| 2146 | u64 num_bytes, u64 *refs, u64 *flags); | 2258 | u64 num_bytes, u64 *refs, u64 *flags); |
| 2147 | int btrfs_pin_extent(struct btrfs_root *root, | 2259 | int btrfs_pin_extent(struct btrfs_root *root, |
| 2148 | u64 bytenr, u64 num, int reserved); | 2260 | u64 bytenr, u64 num, int reserved); |
| 2261 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | ||
| 2262 | struct btrfs_root *root, | ||
| 2263 | u64 bytenr, u64 num_bytes); | ||
| 2149 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 2264 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
| 2150 | struct btrfs_root *root, | 2265 | struct btrfs_root *root, |
| 2151 | u64 objectid, u64 offset, u64 bytenr); | 2266 | u64 objectid, u64 offset, u64 bytenr); |
| @@ -2196,8 +2311,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 2196 | u64 root_objectid, u64 owner, u64 offset); | 2311 | u64 root_objectid, u64 owner, u64 offset); |
| 2197 | 2312 | ||
| 2198 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2313 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
| 2199 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 2314 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, |
| 2200 | u64 num_bytes, int reserve, int sinfo); | 2315 | u64 start, u64 len); |
| 2201 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 2316 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
| 2202 | struct btrfs_root *root); | 2317 | struct btrfs_root *root); |
| 2203 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2318 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
| @@ -2240,25 +2355,23 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | |||
| 2240 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | 2355 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); |
| 2241 | void btrfs_free_block_rsv(struct btrfs_root *root, | 2356 | void btrfs_free_block_rsv(struct btrfs_root *root, |
| 2242 | struct btrfs_block_rsv *rsv); | 2357 | struct btrfs_block_rsv *rsv); |
| 2243 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | 2358 | int btrfs_block_rsv_add(struct btrfs_root *root, |
| 2244 | struct btrfs_block_rsv *rsv); | ||
| 2245 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
| 2246 | struct btrfs_root *root, | ||
| 2247 | struct btrfs_block_rsv *block_rsv, | 2359 | struct btrfs_block_rsv *block_rsv, |
| 2248 | u64 num_bytes); | 2360 | u64 num_bytes); |
| 2249 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | 2361 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, |
| 2250 | struct btrfs_root *root, | 2362 | struct btrfs_block_rsv *block_rsv, |
| 2363 | u64 num_bytes); | ||
| 2364 | int btrfs_block_rsv_check(struct btrfs_root *root, | ||
| 2365 | struct btrfs_block_rsv *block_rsv, int min_factor); | ||
| 2366 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
| 2251 | struct btrfs_block_rsv *block_rsv, | 2367 | struct btrfs_block_rsv *block_rsv, |
| 2252 | u64 min_reserved, int min_factor); | 2368 | u64 min_reserved); |
| 2253 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 2369 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
| 2254 | struct btrfs_block_rsv *dst_rsv, | 2370 | struct btrfs_block_rsv *dst_rsv, |
| 2255 | u64 num_bytes); | 2371 | u64 num_bytes); |
| 2256 | void btrfs_block_rsv_release(struct btrfs_root *root, | 2372 | void btrfs_block_rsv_release(struct btrfs_root *root, |
| 2257 | struct btrfs_block_rsv *block_rsv, | 2373 | struct btrfs_block_rsv *block_rsv, |
| 2258 | u64 num_bytes); | 2374 | u64 num_bytes); |
| 2259 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 2260 | struct btrfs_root *root, | ||
| 2261 | struct btrfs_block_rsv *rsv); | ||
| 2262 | int btrfs_set_block_group_ro(struct btrfs_root *root, | 2375 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
| 2263 | struct btrfs_block_group_cache *cache); | 2376 | struct btrfs_block_group_cache *cache); |
| 2264 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2377 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
| @@ -2379,6 +2492,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | |||
| 2379 | smp_mb(); | 2492 | smp_mb(); |
| 2380 | return fs_info->closing; | 2493 | return fs_info->closing; |
| 2381 | } | 2494 | } |
| 2495 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) | ||
| 2496 | { | ||
| 2497 | kfree(fs_info->delayed_root); | ||
| 2498 | kfree(fs_info->extent_root); | ||
| 2499 | kfree(fs_info->tree_root); | ||
| 2500 | kfree(fs_info->chunk_root); | ||
| 2501 | kfree(fs_info->dev_root); | ||
| 2502 | kfree(fs_info->csum_root); | ||
| 2503 | kfree(fs_info->super_copy); | ||
| 2504 | kfree(fs_info->super_for_commit); | ||
| 2505 | kfree(fs_info); | ||
| 2506 | } | ||
| 2382 | 2507 | ||
| 2383 | /* root-item.c */ | 2508 | /* root-item.c */ |
| 2384 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2509 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
| @@ -2579,11 +2704,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
| 2579 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2704 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2580 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2705 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2581 | int btrfs_orphan_cleanup(struct btrfs_root *root); | 2706 | int btrfs_orphan_cleanup(struct btrfs_root *root); |
| 2582 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2583 | struct btrfs_pending_snapshot *pending, | ||
| 2584 | u64 *bytes_to_reserve); | ||
| 2585 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2586 | struct btrfs_pending_snapshot *pending); | ||
| 2587 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | 2707 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, |
| 2588 | struct btrfs_root *root); | 2708 | struct btrfs_root *root); |
| 2589 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); | 2709 | int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); |
| @@ -2697,4 +2817,20 @@ int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); | |||
| 2697 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 2817 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
| 2698 | struct btrfs_scrub_progress *progress); | 2818 | struct btrfs_scrub_progress *progress); |
| 2699 | 2819 | ||
| 2820 | /* reada.c */ | ||
| 2821 | struct reada_control { | ||
| 2822 | struct btrfs_root *root; /* tree to prefetch */ | ||
| 2823 | struct btrfs_key key_start; | ||
| 2824 | struct btrfs_key key_end; /* exclusive */ | ||
| 2825 | atomic_t elems; | ||
| 2826 | struct kref refcnt; | ||
| 2827 | wait_queue_head_t wait; | ||
| 2828 | }; | ||
| 2829 | struct reada_control *btrfs_reada_add(struct btrfs_root *root, | ||
| 2830 | struct btrfs_key *start, struct btrfs_key *end); | ||
| 2831 | int btrfs_reada_wait(void *handle); | ||
| 2832 | void btrfs_reada_detach(void *handle); | ||
| 2833 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | ||
| 2834 | u64 start, int err); | ||
| 2835 | |||
| 2700 | #endif | 2836 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index ae4d9cd10961..3a1b939c9ae2 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -591,7 +591,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | |||
| 591 | return 0; | 591 | return 0; |
| 592 | 592 | ||
| 593 | src_rsv = trans->block_rsv; | 593 | src_rsv = trans->block_rsv; |
| 594 | dst_rsv = &root->fs_info->global_block_rsv; | 594 | dst_rsv = &root->fs_info->delayed_block_rsv; |
| 595 | 595 | ||
| 596 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 596 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
| 597 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 597 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
| @@ -609,7 +609,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, | |||
| 609 | if (!item->bytes_reserved) | 609 | if (!item->bytes_reserved) |
| 610 | return; | 610 | return; |
| 611 | 611 | ||
| 612 | rsv = &root->fs_info->global_block_rsv; | 612 | rsv = &root->fs_info->delayed_block_rsv; |
| 613 | btrfs_block_rsv_release(root, rsv, | 613 | btrfs_block_rsv_release(root, rsv, |
| 614 | item->bytes_reserved); | 614 | item->bytes_reserved); |
| 615 | } | 615 | } |
| @@ -624,13 +624,36 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 624 | u64 num_bytes; | 624 | u64 num_bytes; |
| 625 | int ret; | 625 | int ret; |
| 626 | 626 | ||
| 627 | if (!trans->bytes_reserved) | ||
| 628 | return 0; | ||
| 629 | |||
| 630 | src_rsv = trans->block_rsv; | 627 | src_rsv = trans->block_rsv; |
| 631 | dst_rsv = &root->fs_info->global_block_rsv; | 628 | dst_rsv = &root->fs_info->delayed_block_rsv; |
| 632 | 629 | ||
| 633 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 630 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
| 631 | |||
| 632 | /* | ||
| 633 | * btrfs_dirty_inode will update the inode under btrfs_join_transaction | ||
| 634 | * which doesn't reserve space for speed. This is a problem since we | ||
| 635 | * still need to reserve space for this update, so try to reserve the | ||
| 636 | * space. | ||
| 637 | * | ||
| 638 | * Now if src_rsv == delalloc_block_rsv we'll let it just steal since | ||
| 639 | * we're accounted for. | ||
| 640 | */ | ||
| 641 | if (!trans->bytes_reserved && | ||
| 642 | src_rsv != &root->fs_info->delalloc_block_rsv) { | ||
| 643 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | ||
| 644 | /* | ||
| 645 | * Since we're under a transaction reserve_metadata_bytes could | ||
| 646 | * try to commit the transaction which will make it return | ||
| 647 | * EAGAIN to make us stop the transaction we have, so return | ||
| 648 | * ENOSPC instead so that btrfs_dirty_inode knows what to do. | ||
| 649 | */ | ||
| 650 | if (ret == -EAGAIN) | ||
| 651 | ret = -ENOSPC; | ||
| 652 | if (!ret) | ||
| 653 | node->bytes_reserved = num_bytes; | ||
| 654 | return ret; | ||
| 655 | } | ||
| 656 | |||
| 634 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 657 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
| 635 | if (!ret) | 658 | if (!ret) |
| 636 | node->bytes_reserved = num_bytes; | 659 | node->bytes_reserved = num_bytes; |
| @@ -646,7 +669,7 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root, | |||
| 646 | if (!node->bytes_reserved) | 669 | if (!node->bytes_reserved) |
| 647 | return; | 670 | return; |
| 648 | 671 | ||
| 649 | rsv = &root->fs_info->global_block_rsv; | 672 | rsv = &root->fs_info->delayed_block_rsv; |
| 650 | btrfs_block_rsv_release(root, rsv, | 673 | btrfs_block_rsv_release(root, rsv, |
| 651 | node->bytes_reserved); | 674 | node->bytes_reserved); |
| 652 | node->bytes_reserved = 0; | 675 | node->bytes_reserved = 0; |
| @@ -1026,7 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
| 1026 | path->leave_spinning = 1; | 1049 | path->leave_spinning = 1; |
| 1027 | 1050 | ||
| 1028 | block_rsv = trans->block_rsv; | 1051 | block_rsv = trans->block_rsv; |
| 1029 | trans->block_rsv = &root->fs_info->global_block_rsv; | 1052 | trans->block_rsv = &root->fs_info->delayed_block_rsv; |
| 1030 | 1053 | ||
| 1031 | delayed_root = btrfs_get_delayed_root(root); | 1054 | delayed_root = btrfs_get_delayed_root(root); |
| 1032 | 1055 | ||
| @@ -1069,7 +1092,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | |||
| 1069 | path->leave_spinning = 1; | 1092 | path->leave_spinning = 1; |
| 1070 | 1093 | ||
| 1071 | block_rsv = trans->block_rsv; | 1094 | block_rsv = trans->block_rsv; |
| 1072 | trans->block_rsv = &node->root->fs_info->global_block_rsv; | 1095 | trans->block_rsv = &node->root->fs_info->delayed_block_rsv; |
| 1073 | 1096 | ||
| 1074 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); | 1097 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); |
| 1075 | if (!ret) | 1098 | if (!ret) |
| @@ -1149,7 +1172,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
| 1149 | goto free_path; | 1172 | goto free_path; |
| 1150 | 1173 | ||
| 1151 | block_rsv = trans->block_rsv; | 1174 | block_rsv = trans->block_rsv; |
| 1152 | trans->block_rsv = &root->fs_info->global_block_rsv; | 1175 | trans->block_rsv = &root->fs_info->delayed_block_rsv; |
| 1153 | 1176 | ||
| 1154 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); | 1177 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); |
| 1155 | if (!ret) | 1178 | if (!ret) |
| @@ -1686,11 +1709,8 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | |||
| 1686 | } | 1709 | } |
| 1687 | 1710 | ||
| 1688 | ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); | 1711 | ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); |
| 1689 | /* | 1712 | if (ret) |
| 1690 | * we must reserve enough space when we start a new transaction, | 1713 | goto release_node; |
| 1691 | * so reserving metadata failure is impossible | ||
| 1692 | */ | ||
| 1693 | BUG_ON(ret); | ||
| 1694 | 1714 | ||
| 1695 | fill_stack_inode_item(trans, &delayed_node->inode_item, inode); | 1715 | fill_stack_inode_item(trans, &delayed_node->inode_item, inode); |
| 1696 | delayed_node->inode_dirty = 1; | 1716 | delayed_node->inode_dirty = 1; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 07ea91879a91..102c176fc29c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -256,8 +256,7 @@ void btrfs_csum_final(u32 crc, char *result) | |||
| 256 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | 256 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, |
| 257 | int verify) | 257 | int verify) |
| 258 | { | 258 | { |
| 259 | u16 csum_size = | 259 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 260 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
| 261 | char *result = NULL; | 260 | char *result = NULL; |
| 262 | unsigned long len; | 261 | unsigned long len; |
| 263 | unsigned long cur_len; | 262 | unsigned long cur_len; |
| @@ -367,7 +366,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
| 367 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | 366 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); |
| 368 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 367 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
| 369 | while (1) { | 368 | while (1) { |
| 370 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 369 | ret = read_extent_buffer_pages(io_tree, eb, start, |
| 370 | WAIT_COMPLETE, | ||
| 371 | btree_get_extent, mirror_num); | 371 | btree_get_extent, mirror_num); |
| 372 | if (!ret && | 372 | if (!ret && |
| 373 | !verify_parent_transid(io_tree, eb, parent_transid)) | 373 | !verify_parent_transid(io_tree, eb, parent_transid)) |
| @@ -608,11 +608,48 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 608 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 608 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
| 609 | end = eb->start + end - 1; | 609 | end = eb->start + end - 1; |
| 610 | err: | 610 | err: |
| 611 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
| 612 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
| 613 | btree_readahead_hook(root, eb, eb->start, ret); | ||
| 614 | } | ||
| 615 | |||
| 611 | free_extent_buffer(eb); | 616 | free_extent_buffer(eb); |
| 612 | out: | 617 | out: |
| 613 | return ret; | 618 | return ret; |
| 614 | } | 619 | } |
| 615 | 620 | ||
| 621 | static int btree_io_failed_hook(struct bio *failed_bio, | ||
| 622 | struct page *page, u64 start, u64 end, | ||
| 623 | u64 mirror_num, struct extent_state *state) | ||
| 624 | { | ||
| 625 | struct extent_io_tree *tree; | ||
| 626 | unsigned long len; | ||
| 627 | struct extent_buffer *eb; | ||
| 628 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
| 629 | |||
| 630 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 631 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
| 632 | goto out; | ||
| 633 | if (!page->private) | ||
| 634 | goto out; | ||
| 635 | |||
| 636 | len = page->private >> 2; | ||
| 637 | WARN_ON(len == 0); | ||
| 638 | |||
| 639 | eb = alloc_extent_buffer(tree, start, len, page); | ||
| 640 | if (eb == NULL) | ||
| 641 | goto out; | ||
| 642 | |||
| 643 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
| 644 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
| 645 | btree_readahead_hook(root, eb, eb->start, -EIO); | ||
| 646 | } | ||
| 647 | free_extent_buffer(eb); | ||
| 648 | |||
| 649 | out: | ||
| 650 | return -EIO; /* we fixed nothing */ | ||
| 651 | } | ||
| 652 | |||
| 616 | static void end_workqueue_bio(struct bio *bio, int err) | 653 | static void end_workqueue_bio(struct bio *bio, int err) |
| 617 | { | 654 | { |
| 618 | struct end_io_wq *end_io_wq = bio->bi_private; | 655 | struct end_io_wq *end_io_wq = bio->bi_private; |
| @@ -908,7 +945,7 @@ static int btree_readpage(struct file *file, struct page *page) | |||
| 908 | { | 945 | { |
| 909 | struct extent_io_tree *tree; | 946 | struct extent_io_tree *tree; |
| 910 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 947 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 911 | return extent_read_full_page(tree, page, btree_get_extent); | 948 | return extent_read_full_page(tree, page, btree_get_extent, 0); |
| 912 | } | 949 | } |
| 913 | 950 | ||
| 914 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) | 951 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) |
| @@ -974,11 +1011,43 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | |||
| 974 | if (!buf) | 1011 | if (!buf) |
| 975 | return 0; | 1012 | return 0; |
| 976 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | 1013 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, |
| 977 | buf, 0, 0, btree_get_extent, 0); | 1014 | buf, 0, WAIT_NONE, btree_get_extent, 0); |
| 978 | free_extent_buffer(buf); | 1015 | free_extent_buffer(buf); |
| 979 | return ret; | 1016 | return ret; |
| 980 | } | 1017 | } |
| 981 | 1018 | ||
| 1019 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
| 1020 | int mirror_num, struct extent_buffer **eb) | ||
| 1021 | { | ||
| 1022 | struct extent_buffer *buf = NULL; | ||
| 1023 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 1024 | struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; | ||
| 1025 | int ret; | ||
| 1026 | |||
| 1027 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 1028 | if (!buf) | ||
| 1029 | return 0; | ||
| 1030 | |||
| 1031 | set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); | ||
| 1032 | |||
| 1033 | ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK, | ||
| 1034 | btree_get_extent, mirror_num); | ||
| 1035 | if (ret) { | ||
| 1036 | free_extent_buffer(buf); | ||
| 1037 | return ret; | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { | ||
| 1041 | free_extent_buffer(buf); | ||
| 1042 | return -EIO; | ||
| 1043 | } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { | ||
| 1044 | *eb = buf; | ||
| 1045 | } else { | ||
| 1046 | free_extent_buffer(buf); | ||
| 1047 | } | ||
| 1048 | return 0; | ||
| 1049 | } | ||
| 1050 | |||
| 982 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 1051 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
| 983 | u64 bytenr, u32 blocksize) | 1052 | u64 bytenr, u32 blocksize) |
| 984 | { | 1053 | { |
| @@ -1135,10 +1204,12 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
| 1135 | 1204 | ||
| 1136 | generation = btrfs_root_generation(&root->root_item); | 1205 | generation = btrfs_root_generation(&root->root_item); |
| 1137 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1206 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
| 1207 | root->commit_root = NULL; | ||
| 1138 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1208 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
| 1139 | blocksize, generation); | 1209 | blocksize, generation); |
| 1140 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { | 1210 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { |
| 1141 | free_extent_buffer(root->node); | 1211 | free_extent_buffer(root->node); |
| 1212 | root->node = NULL; | ||
| 1142 | return -EIO; | 1213 | return -EIO; |
| 1143 | } | 1214 | } |
| 1144 | root->commit_root = btrfs_root_node(root); | 1215 | root->commit_root = btrfs_root_node(root); |
| @@ -1577,6 +1648,235 @@ sleep: | |||
| 1577 | return 0; | 1648 | return 0; |
| 1578 | } | 1649 | } |
| 1579 | 1650 | ||
| 1651 | /* | ||
| 1652 | * this will find the highest generation in the array of | ||
| 1653 | * root backups. The index of the highest array is returned, | ||
| 1654 | * or -1 if we can't find anything. | ||
| 1655 | * | ||
| 1656 | * We check to make sure the array is valid by comparing the | ||
| 1657 | * generation of the latest root in the array with the generation | ||
| 1658 | * in the super block. If they don't match we pitch it. | ||
| 1659 | */ | ||
| 1660 | static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen) | ||
| 1661 | { | ||
| 1662 | u64 cur; | ||
| 1663 | int newest_index = -1; | ||
| 1664 | struct btrfs_root_backup *root_backup; | ||
| 1665 | int i; | ||
| 1666 | |||
| 1667 | for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { | ||
| 1668 | root_backup = info->super_copy->super_roots + i; | ||
| 1669 | cur = btrfs_backup_tree_root_gen(root_backup); | ||
| 1670 | if (cur == newest_gen) | ||
| 1671 | newest_index = i; | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | /* check to see if we actually wrapped around */ | ||
| 1675 | if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) { | ||
| 1676 | root_backup = info->super_copy->super_roots; | ||
| 1677 | cur = btrfs_backup_tree_root_gen(root_backup); | ||
| 1678 | if (cur == newest_gen) | ||
| 1679 | newest_index = 0; | ||
| 1680 | } | ||
| 1681 | return newest_index; | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | |||
| 1685 | /* | ||
| 1686 | * find the oldest backup so we know where to store new entries | ||
| 1687 | * in the backup array. This will set the backup_root_index | ||
| 1688 | * field in the fs_info struct | ||
| 1689 | */ | ||
| 1690 | static void find_oldest_super_backup(struct btrfs_fs_info *info, | ||
| 1691 | u64 newest_gen) | ||
| 1692 | { | ||
| 1693 | int newest_index = -1; | ||
| 1694 | |||
| 1695 | newest_index = find_newest_super_backup(info, newest_gen); | ||
| 1696 | /* if there was garbage in there, just move along */ | ||
| 1697 | if (newest_index == -1) { | ||
| 1698 | info->backup_root_index = 0; | ||
| 1699 | } else { | ||
| 1700 | info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS; | ||
| 1701 | } | ||
| 1702 | } | ||
| 1703 | |||
| 1704 | /* | ||
| 1705 | * copy all the root pointers into the super backup array. | ||
| 1706 | * this will bump the backup pointer by one when it is | ||
| 1707 | * done | ||
| 1708 | */ | ||
| 1709 | static void backup_super_roots(struct btrfs_fs_info *info) | ||
| 1710 | { | ||
| 1711 | int next_backup; | ||
| 1712 | struct btrfs_root_backup *root_backup; | ||
| 1713 | int last_backup; | ||
| 1714 | |||
| 1715 | next_backup = info->backup_root_index; | ||
| 1716 | last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) % | ||
| 1717 | BTRFS_NUM_BACKUP_ROOTS; | ||
| 1718 | |||
| 1719 | /* | ||
| 1720 | * just overwrite the last backup if we're at the same generation | ||
| 1721 | * this happens only at umount | ||
| 1722 | */ | ||
| 1723 | root_backup = info->super_for_commit->super_roots + last_backup; | ||
| 1724 | if (btrfs_backup_tree_root_gen(root_backup) == | ||
| 1725 | btrfs_header_generation(info->tree_root->node)) | ||
| 1726 | next_backup = last_backup; | ||
| 1727 | |||
| 1728 | root_backup = info->super_for_commit->super_roots + next_backup; | ||
| 1729 | |||
| 1730 | /* | ||
| 1731 | * make sure all of our padding and empty slots get zero filled | ||
| 1732 | * regardless of which ones we use today | ||
| 1733 | */ | ||
| 1734 | memset(root_backup, 0, sizeof(*root_backup)); | ||
| 1735 | |||
| 1736 | info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS; | ||
| 1737 | |||
| 1738 | btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start); | ||
| 1739 | btrfs_set_backup_tree_root_gen(root_backup, | ||
| 1740 | btrfs_header_generation(info->tree_root->node)); | ||
| 1741 | |||
| 1742 | btrfs_set_backup_tree_root_level(root_backup, | ||
| 1743 | btrfs_header_level(info->tree_root->node)); | ||
| 1744 | |||
| 1745 | btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start); | ||
| 1746 | btrfs_set_backup_chunk_root_gen(root_backup, | ||
| 1747 | btrfs_header_generation(info->chunk_root->node)); | ||
| 1748 | btrfs_set_backup_chunk_root_level(root_backup, | ||
| 1749 | btrfs_header_level(info->chunk_root->node)); | ||
| 1750 | |||
| 1751 | btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start); | ||
| 1752 | btrfs_set_backup_extent_root_gen(root_backup, | ||
| 1753 | btrfs_header_generation(info->extent_root->node)); | ||
| 1754 | btrfs_set_backup_extent_root_level(root_backup, | ||
| 1755 | btrfs_header_level(info->extent_root->node)); | ||
| 1756 | |||
| 1757 | /* | ||
| 1758 | * we might commit during log recovery, which happens before we set | ||
| 1759 | * the fs_root. Make sure it is valid before we fill it in. | ||
| 1760 | */ | ||
| 1761 | if (info->fs_root && info->fs_root->node) { | ||
| 1762 | btrfs_set_backup_fs_root(root_backup, | ||
| 1763 | info->fs_root->node->start); | ||
| 1764 | btrfs_set_backup_fs_root_gen(root_backup, | ||
| 1765 | btrfs_header_generation(info->fs_root->node)); | ||
| 1766 | btrfs_set_backup_fs_root_level(root_backup, | ||
| 1767 | btrfs_header_level(info->fs_root->node)); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start); | ||
| 1771 | btrfs_set_backup_dev_root_gen(root_backup, | ||
| 1772 | btrfs_header_generation(info->dev_root->node)); | ||
| 1773 | btrfs_set_backup_dev_root_level(root_backup, | ||
| 1774 | btrfs_header_level(info->dev_root->node)); | ||
| 1775 | |||
| 1776 | btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start); | ||
| 1777 | btrfs_set_backup_csum_root_gen(root_backup, | ||
| 1778 | btrfs_header_generation(info->csum_root->node)); | ||
| 1779 | btrfs_set_backup_csum_root_level(root_backup, | ||
| 1780 | btrfs_header_level(info->csum_root->node)); | ||
| 1781 | |||
| 1782 | btrfs_set_backup_total_bytes(root_backup, | ||
| 1783 | btrfs_super_total_bytes(info->super_copy)); | ||
| 1784 | btrfs_set_backup_bytes_used(root_backup, | ||
| 1785 | btrfs_super_bytes_used(info->super_copy)); | ||
| 1786 | btrfs_set_backup_num_devices(root_backup, | ||
| 1787 | btrfs_super_num_devices(info->super_copy)); | ||
| 1788 | |||
| 1789 | /* | ||
| 1790 | * if we don't copy this out to the super_copy, it won't get remembered | ||
| 1791 | * for the next commit | ||
| 1792 | */ | ||
| 1793 | memcpy(&info->super_copy->super_roots, | ||
| 1794 | &info->super_for_commit->super_roots, | ||
| 1795 | sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | /* | ||
| 1799 | * this copies info out of the root backup array and back into | ||
| 1800 | * the in-memory super block. It is meant to help iterate through | ||
| 1801 | * the array, so you send it the number of backups you've already | ||
| 1802 | * tried and the last backup index you used. | ||
| 1803 | * | ||
| 1804 | * this returns -1 when it has tried all the backups | ||
| 1805 | */ | ||
| 1806 | static noinline int next_root_backup(struct btrfs_fs_info *info, | ||
| 1807 | struct btrfs_super_block *super, | ||
| 1808 | int *num_backups_tried, int *backup_index) | ||
| 1809 | { | ||
| 1810 | struct btrfs_root_backup *root_backup; | ||
| 1811 | int newest = *backup_index; | ||
| 1812 | |||
| 1813 | if (*num_backups_tried == 0) { | ||
| 1814 | u64 gen = btrfs_super_generation(super); | ||
| 1815 | |||
| 1816 | newest = find_newest_super_backup(info, gen); | ||
| 1817 | if (newest == -1) | ||
| 1818 | return -1; | ||
| 1819 | |||
| 1820 | *backup_index = newest; | ||
| 1821 | *num_backups_tried = 1; | ||
| 1822 | } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) { | ||
| 1823 | /* we've tried all the backups, all done */ | ||
| 1824 | return -1; | ||
| 1825 | } else { | ||
| 1826 | /* jump to the next oldest backup */ | ||
| 1827 | newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) % | ||
| 1828 | BTRFS_NUM_BACKUP_ROOTS; | ||
| 1829 | *backup_index = newest; | ||
| 1830 | *num_backups_tried += 1; | ||
| 1831 | } | ||
| 1832 | root_backup = super->super_roots + newest; | ||
| 1833 | |||
| 1834 | btrfs_set_super_generation(super, | ||
| 1835 | btrfs_backup_tree_root_gen(root_backup)); | ||
| 1836 | btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup)); | ||
| 1837 | btrfs_set_super_root_level(super, | ||
| 1838 | btrfs_backup_tree_root_level(root_backup)); | ||
| 1839 | btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup)); | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * fixme: the total bytes and num_devices need to match or we should | ||
| 1843 | * need a fsck | ||
| 1844 | */ | ||
| 1845 | btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup)); | ||
| 1846 | btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup)); | ||
| 1847 | return 0; | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | /* helper to cleanup tree roots */ | ||
| 1851 | static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | ||
| 1852 | { | ||
| 1853 | free_extent_buffer(info->tree_root->node); | ||
| 1854 | free_extent_buffer(info->tree_root->commit_root); | ||
| 1855 | free_extent_buffer(info->dev_root->node); | ||
| 1856 | free_extent_buffer(info->dev_root->commit_root); | ||
| 1857 | free_extent_buffer(info->extent_root->node); | ||
| 1858 | free_extent_buffer(info->extent_root->commit_root); | ||
| 1859 | free_extent_buffer(info->csum_root->node); | ||
| 1860 | free_extent_buffer(info->csum_root->commit_root); | ||
| 1861 | |||
| 1862 | info->tree_root->node = NULL; | ||
| 1863 | info->tree_root->commit_root = NULL; | ||
| 1864 | info->dev_root->node = NULL; | ||
| 1865 | info->dev_root->commit_root = NULL; | ||
| 1866 | info->extent_root->node = NULL; | ||
| 1867 | info->extent_root->commit_root = NULL; | ||
| 1868 | info->csum_root->node = NULL; | ||
| 1869 | info->csum_root->commit_root = NULL; | ||
| 1870 | |||
| 1871 | if (chunk_root) { | ||
| 1872 | free_extent_buffer(info->chunk_root->node); | ||
| 1873 | free_extent_buffer(info->chunk_root->commit_root); | ||
| 1874 | info->chunk_root->node = NULL; | ||
| 1875 | info->chunk_root->commit_root = NULL; | ||
| 1876 | } | ||
| 1877 | } | ||
| 1878 | |||
| 1879 | |||
| 1580 | struct btrfs_root *open_ctree(struct super_block *sb, | 1880 | struct btrfs_root *open_ctree(struct super_block *sb, |
| 1581 | struct btrfs_fs_devices *fs_devices, | 1881 | struct btrfs_fs_devices *fs_devices, |
| 1582 | char *options) | 1882 | char *options) |
| @@ -1604,6 +1904,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1604 | 1904 | ||
| 1605 | int ret; | 1905 | int ret; |
| 1606 | int err = -EINVAL; | 1906 | int err = -EINVAL; |
| 1907 | int num_backups_tried = 0; | ||
| 1908 | int backup_index = 0; | ||
| 1607 | 1909 | ||
| 1608 | struct btrfs_super_block *disk_super; | 1910 | struct btrfs_super_block *disk_super; |
| 1609 | 1911 | ||
| @@ -1648,6 +1950,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1648 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1950 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
| 1649 | spin_lock_init(&fs_info->delayed_iput_lock); | 1951 | spin_lock_init(&fs_info->delayed_iput_lock); |
| 1650 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1952 | spin_lock_init(&fs_info->defrag_inodes_lock); |
| 1953 | spin_lock_init(&fs_info->free_chunk_lock); | ||
| 1651 | mutex_init(&fs_info->reloc_mutex); | 1954 | mutex_init(&fs_info->reloc_mutex); |
| 1652 | 1955 | ||
| 1653 | init_completion(&fs_info->kobj_unregister); | 1956 | init_completion(&fs_info->kobj_unregister); |
| @@ -1665,8 +1968,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1665 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | 1968 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); |
| 1666 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | 1969 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); |
| 1667 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | 1970 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); |
| 1668 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | 1971 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv); |
| 1669 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
| 1670 | atomic_set(&fs_info->nr_async_submits, 0); | 1972 | atomic_set(&fs_info->nr_async_submits, 0); |
| 1671 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1973 | atomic_set(&fs_info->async_delalloc_pages, 0); |
| 1672 | atomic_set(&fs_info->async_submit_draining, 0); | 1974 | atomic_set(&fs_info->async_submit_draining, 0); |
| @@ -1677,6 +1979,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1677 | fs_info->metadata_ratio = 0; | 1979 | fs_info->metadata_ratio = 0; |
| 1678 | fs_info->defrag_inodes = RB_ROOT; | 1980 | fs_info->defrag_inodes = RB_ROOT; |
| 1679 | fs_info->trans_no_join = 0; | 1981 | fs_info->trans_no_join = 0; |
| 1982 | fs_info->free_chunk_space = 0; | ||
| 1983 | |||
| 1984 | /* readahead state */ | ||
| 1985 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | ||
| 1986 | spin_lock_init(&fs_info->reada_lock); | ||
| 1680 | 1987 | ||
| 1681 | fs_info->thread_pool_size = min_t(unsigned long, | 1988 | fs_info->thread_pool_size = min_t(unsigned long, |
| 1682 | num_online_cpus() + 2, 8); | 1989 | num_online_cpus() + 2, 8); |
| @@ -1766,14 +2073,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1766 | goto fail_alloc; | 2073 | goto fail_alloc; |
| 1767 | } | 2074 | } |
| 1768 | 2075 | ||
| 1769 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 2076 | memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); |
| 1770 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 2077 | memcpy(fs_info->super_for_commit, fs_info->super_copy, |
| 1771 | sizeof(fs_info->super_for_commit)); | 2078 | sizeof(*fs_info->super_for_commit)); |
| 1772 | brelse(bh); | 2079 | brelse(bh); |
| 1773 | 2080 | ||
| 1774 | memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); | 2081 | memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); |
| 1775 | 2082 | ||
| 1776 | disk_super = &fs_info->super_copy; | 2083 | disk_super = fs_info->super_copy; |
| 1777 | if (!btrfs_super_root(disk_super)) | 2084 | if (!btrfs_super_root(disk_super)) |
| 1778 | goto fail_alloc; | 2085 | goto fail_alloc; |
| 1779 | 2086 | ||
| @@ -1783,6 +2090,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1783 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 2090 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
| 1784 | 2091 | ||
| 1785 | /* | 2092 | /* |
| 2093 | * run through our array of backup supers and setup | ||
| 2094 | * our ring pointer to the oldest one | ||
| 2095 | */ | ||
| 2096 | generation = btrfs_super_generation(disk_super); | ||
| 2097 | find_oldest_super_backup(fs_info, generation); | ||
| 2098 | |||
| 2099 | /* | ||
| 1786 | * In the long term, we'll store the compression type in the super | 2100 | * In the long term, we'll store the compression type in the super |
| 1787 | * block, and it'll be used for per file compression control. | 2101 | * block, and it'll be used for per file compression control. |
| 1788 | */ | 2102 | */ |
| @@ -1870,6 +2184,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1870 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | 2184 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", |
| 1871 | fs_info->thread_pool_size, | 2185 | fs_info->thread_pool_size, |
| 1872 | &fs_info->generic_worker); | 2186 | &fs_info->generic_worker); |
| 2187 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
| 2188 | fs_info->thread_pool_size, | ||
| 2189 | &fs_info->generic_worker); | ||
| 1873 | 2190 | ||
| 1874 | /* | 2191 | /* |
| 1875 | * endios are largely parallel and should have a very | 2192 | * endios are largely parallel and should have a very |
| @@ -1880,6 +2197,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1880 | 2197 | ||
| 1881 | fs_info->endio_write_workers.idle_thresh = 2; | 2198 | fs_info->endio_write_workers.idle_thresh = 2; |
| 1882 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2199 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
| 2200 | fs_info->readahead_workers.idle_thresh = 2; | ||
| 1883 | 2201 | ||
| 1884 | btrfs_start_workers(&fs_info->workers, 1); | 2202 | btrfs_start_workers(&fs_info->workers, 1); |
| 1885 | btrfs_start_workers(&fs_info->generic_worker, 1); | 2203 | btrfs_start_workers(&fs_info->generic_worker, 1); |
| @@ -1893,6 +2211,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1893 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 2211 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
| 1894 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 2212 | btrfs_start_workers(&fs_info->delayed_workers, 1); |
| 1895 | btrfs_start_workers(&fs_info->caching_workers, 1); | 2213 | btrfs_start_workers(&fs_info->caching_workers, 1); |
| 2214 | btrfs_start_workers(&fs_info->readahead_workers, 1); | ||
| 1896 | 2215 | ||
| 1897 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 2216 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1898 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 2217 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
| @@ -1939,7 +2258,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1939 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | 2258 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { |
| 1940 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", | 2259 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", |
| 1941 | sb->s_id); | 2260 | sb->s_id); |
| 1942 | goto fail_chunk_root; | 2261 | goto fail_tree_roots; |
| 1943 | } | 2262 | } |
| 1944 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); | 2263 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); |
| 1945 | chunk_root->commit_root = btrfs_root_node(chunk_root); | 2264 | chunk_root->commit_root = btrfs_root_node(chunk_root); |
| @@ -1954,11 +2273,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1954 | if (ret) { | 2273 | if (ret) { |
| 1955 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", | 2274 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", |
| 1956 | sb->s_id); | 2275 | sb->s_id); |
| 1957 | goto fail_chunk_root; | 2276 | goto fail_tree_roots; |
| 1958 | } | 2277 | } |
| 1959 | 2278 | ||
| 1960 | btrfs_close_extra_devices(fs_devices); | 2279 | btrfs_close_extra_devices(fs_devices); |
| 1961 | 2280 | ||
| 2281 | retry_root_backup: | ||
| 1962 | blocksize = btrfs_level_size(tree_root, | 2282 | blocksize = btrfs_level_size(tree_root, |
| 1963 | btrfs_super_root_level(disk_super)); | 2283 | btrfs_super_root_level(disk_super)); |
| 1964 | generation = btrfs_super_generation(disk_super); | 2284 | generation = btrfs_super_generation(disk_super); |
| @@ -1966,32 +2286,33 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1966 | tree_root->node = read_tree_block(tree_root, | 2286 | tree_root->node = read_tree_block(tree_root, |
| 1967 | btrfs_super_root(disk_super), | 2287 | btrfs_super_root(disk_super), |
| 1968 | blocksize, generation); | 2288 | blocksize, generation); |
| 1969 | if (!tree_root->node) | 2289 | if (!tree_root->node || |
| 1970 | goto fail_chunk_root; | 2290 | !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { |
| 1971 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { | ||
| 1972 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", | 2291 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", |
| 1973 | sb->s_id); | 2292 | sb->s_id); |
| 1974 | goto fail_tree_root; | 2293 | |
| 2294 | goto recovery_tree_root; | ||
| 1975 | } | 2295 | } |
| 2296 | |||
| 1976 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | 2297 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); |
| 1977 | tree_root->commit_root = btrfs_root_node(tree_root); | 2298 | tree_root->commit_root = btrfs_root_node(tree_root); |
| 1978 | 2299 | ||
| 1979 | ret = find_and_setup_root(tree_root, fs_info, | 2300 | ret = find_and_setup_root(tree_root, fs_info, |
| 1980 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | 2301 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); |
| 1981 | if (ret) | 2302 | if (ret) |
| 1982 | goto fail_tree_root; | 2303 | goto recovery_tree_root; |
| 1983 | extent_root->track_dirty = 1; | 2304 | extent_root->track_dirty = 1; |
| 1984 | 2305 | ||
| 1985 | ret = find_and_setup_root(tree_root, fs_info, | 2306 | ret = find_and_setup_root(tree_root, fs_info, |
| 1986 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 2307 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
| 1987 | if (ret) | 2308 | if (ret) |
| 1988 | goto fail_extent_root; | 2309 | goto recovery_tree_root; |
| 1989 | dev_root->track_dirty = 1; | 2310 | dev_root->track_dirty = 1; |
| 1990 | 2311 | ||
| 1991 | ret = find_and_setup_root(tree_root, fs_info, | 2312 | ret = find_and_setup_root(tree_root, fs_info, |
| 1992 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | 2313 | BTRFS_CSUM_TREE_OBJECTID, csum_root); |
| 1993 | if (ret) | 2314 | if (ret) |
| 1994 | goto fail_dev_root; | 2315 | goto recovery_tree_root; |
| 1995 | 2316 | ||
| 1996 | csum_root->track_dirty = 1; | 2317 | csum_root->track_dirty = 1; |
| 1997 | 2318 | ||
| @@ -2124,22 +2445,13 @@ fail_cleaner: | |||
| 2124 | 2445 | ||
| 2125 | fail_block_groups: | 2446 | fail_block_groups: |
| 2126 | btrfs_free_block_groups(fs_info); | 2447 | btrfs_free_block_groups(fs_info); |
| 2127 | free_extent_buffer(csum_root->node); | 2448 | |
| 2128 | free_extent_buffer(csum_root->commit_root); | 2449 | fail_tree_roots: |
| 2129 | fail_dev_root: | 2450 | free_root_pointers(fs_info, 1); |
| 2130 | free_extent_buffer(dev_root->node); | 2451 | |
| 2131 | free_extent_buffer(dev_root->commit_root); | ||
| 2132 | fail_extent_root: | ||
| 2133 | free_extent_buffer(extent_root->node); | ||
| 2134 | free_extent_buffer(extent_root->commit_root); | ||
| 2135 | fail_tree_root: | ||
| 2136 | free_extent_buffer(tree_root->node); | ||
| 2137 | free_extent_buffer(tree_root->commit_root); | ||
| 2138 | fail_chunk_root: | ||
| 2139 | free_extent_buffer(chunk_root->node); | ||
| 2140 | free_extent_buffer(chunk_root->commit_root); | ||
| 2141 | fail_sb_buffer: | 2452 | fail_sb_buffer: |
| 2142 | btrfs_stop_workers(&fs_info->generic_worker); | 2453 | btrfs_stop_workers(&fs_info->generic_worker); |
| 2454 | btrfs_stop_workers(&fs_info->readahead_workers); | ||
| 2143 | btrfs_stop_workers(&fs_info->fixup_workers); | 2455 | btrfs_stop_workers(&fs_info->fixup_workers); |
| 2144 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2456 | btrfs_stop_workers(&fs_info->delalloc_workers); |
| 2145 | btrfs_stop_workers(&fs_info->workers); | 2457 | btrfs_stop_workers(&fs_info->workers); |
| @@ -2152,7 +2464,6 @@ fail_sb_buffer: | |||
| 2152 | btrfs_stop_workers(&fs_info->delayed_workers); | 2464 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 2153 | btrfs_stop_workers(&fs_info->caching_workers); | 2465 | btrfs_stop_workers(&fs_info->caching_workers); |
| 2154 | fail_alloc: | 2466 | fail_alloc: |
| 2155 | kfree(fs_info->delayed_root); | ||
| 2156 | fail_iput: | 2467 | fail_iput: |
| 2157 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2468 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
| 2158 | iput(fs_info->btree_inode); | 2469 | iput(fs_info->btree_inode); |
| @@ -2164,13 +2475,27 @@ fail_bdi: | |||
| 2164 | fail_srcu: | 2475 | fail_srcu: |
| 2165 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2476 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
| 2166 | fail: | 2477 | fail: |
| 2167 | kfree(extent_root); | 2478 | free_fs_info(fs_info); |
| 2168 | kfree(tree_root); | ||
| 2169 | kfree(fs_info); | ||
| 2170 | kfree(chunk_root); | ||
| 2171 | kfree(dev_root); | ||
| 2172 | kfree(csum_root); | ||
| 2173 | return ERR_PTR(err); | 2479 | return ERR_PTR(err); |
| 2480 | |||
| 2481 | recovery_tree_root: | ||
| 2482 | |||
| 2483 | if (!btrfs_test_opt(tree_root, RECOVERY)) | ||
| 2484 | goto fail_tree_roots; | ||
| 2485 | |||
| 2486 | free_root_pointers(fs_info, 0); | ||
| 2487 | |||
| 2488 | /* don't use the log in recovery mode, it won't be valid */ | ||
| 2489 | btrfs_set_super_log_root(disk_super, 0); | ||
| 2490 | |||
| 2491 | /* we can't trust the free space cache either */ | ||
| 2492 | btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); | ||
| 2493 | |||
| 2494 | ret = next_root_backup(fs_info, fs_info->super_copy, | ||
| 2495 | &num_backups_tried, &backup_index); | ||
| 2496 | if (ret == -1) | ||
| 2497 | goto fail_block_groups; | ||
| 2498 | goto retry_root_backup; | ||
| 2174 | } | 2499 | } |
| 2175 | 2500 | ||
| 2176 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 2501 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
| @@ -2338,10 +2663,11 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
| 2338 | int total_errors = 0; | 2663 | int total_errors = 0; |
| 2339 | u64 flags; | 2664 | u64 flags; |
| 2340 | 2665 | ||
| 2341 | max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | 2666 | max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
| 2342 | do_barriers = !btrfs_test_opt(root, NOBARRIER); | 2667 | do_barriers = !btrfs_test_opt(root, NOBARRIER); |
| 2668 | backup_super_roots(root->fs_info); | ||
| 2343 | 2669 | ||
| 2344 | sb = &root->fs_info->super_for_commit; | 2670 | sb = root->fs_info->super_for_commit; |
| 2345 | dev_item = &sb->dev_item; | 2671 | dev_item = &sb->dev_item; |
| 2346 | 2672 | ||
| 2347 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2673 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| @@ -2545,8 +2871,6 @@ int close_ctree(struct btrfs_root *root) | |||
| 2545 | /* clear out the rbtree of defraggable inodes */ | 2871 | /* clear out the rbtree of defraggable inodes */ |
| 2546 | btrfs_run_defrag_inodes(root->fs_info); | 2872 | btrfs_run_defrag_inodes(root->fs_info); |
| 2547 | 2873 | ||
| 2548 | btrfs_put_block_group_cache(fs_info); | ||
| 2549 | |||
| 2550 | /* | 2874 | /* |
| 2551 | * Here come 2 situations when btrfs is broken to flip readonly: | 2875 | * Here come 2 situations when btrfs is broken to flip readonly: |
| 2552 | * | 2876 | * |
| @@ -2572,6 +2896,8 @@ int close_ctree(struct btrfs_root *root) | |||
| 2572 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2896 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
| 2573 | } | 2897 | } |
| 2574 | 2898 | ||
| 2899 | btrfs_put_block_group_cache(fs_info); | ||
| 2900 | |||
| 2575 | kthread_stop(root->fs_info->transaction_kthread); | 2901 | kthread_stop(root->fs_info->transaction_kthread); |
| 2576 | kthread_stop(root->fs_info->cleaner_kthread); | 2902 | kthread_stop(root->fs_info->cleaner_kthread); |
| 2577 | 2903 | ||
| @@ -2603,7 +2929,6 @@ int close_ctree(struct btrfs_root *root) | |||
| 2603 | del_fs_roots(fs_info); | 2929 | del_fs_roots(fs_info); |
| 2604 | 2930 | ||
| 2605 | iput(fs_info->btree_inode); | 2931 | iput(fs_info->btree_inode); |
| 2606 | kfree(fs_info->delayed_root); | ||
| 2607 | 2932 | ||
| 2608 | btrfs_stop_workers(&fs_info->generic_worker); | 2933 | btrfs_stop_workers(&fs_info->generic_worker); |
| 2609 | btrfs_stop_workers(&fs_info->fixup_workers); | 2934 | btrfs_stop_workers(&fs_info->fixup_workers); |
| @@ -2617,6 +2942,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 2617 | btrfs_stop_workers(&fs_info->submit_workers); | 2942 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2618 | btrfs_stop_workers(&fs_info->delayed_workers); | 2943 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 2619 | btrfs_stop_workers(&fs_info->caching_workers); | 2944 | btrfs_stop_workers(&fs_info->caching_workers); |
| 2945 | btrfs_stop_workers(&fs_info->readahead_workers); | ||
| 2620 | 2946 | ||
| 2621 | btrfs_close_devices(fs_info->fs_devices); | 2947 | btrfs_close_devices(fs_info->fs_devices); |
| 2622 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2948 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| @@ -2624,12 +2950,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 2624 | bdi_destroy(&fs_info->bdi); | 2950 | bdi_destroy(&fs_info->bdi); |
| 2625 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2951 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
| 2626 | 2952 | ||
| 2627 | kfree(fs_info->extent_root); | 2953 | free_fs_info(fs_info); |
| 2628 | kfree(fs_info->tree_root); | ||
| 2629 | kfree(fs_info->chunk_root); | ||
| 2630 | kfree(fs_info->dev_root); | ||
| 2631 | kfree(fs_info->csum_root); | ||
| 2632 | kfree(fs_info); | ||
| 2633 | 2954 | ||
| 2634 | return 0; | 2955 | return 0; |
| 2635 | } | 2956 | } |
| @@ -2735,7 +3056,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
| 2735 | return ret; | 3056 | return ret; |
| 2736 | } | 3057 | } |
| 2737 | 3058 | ||
| 2738 | int btree_lock_page_hook(struct page *page) | 3059 | static int btree_lock_page_hook(struct page *page, void *data, |
| 3060 | void (*flush_fn)(void *)) | ||
| 2739 | { | 3061 | { |
| 2740 | struct inode *inode = page->mapping->host; | 3062 | struct inode *inode = page->mapping->host; |
| 2741 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3063 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| @@ -2752,7 +3074,10 @@ int btree_lock_page_hook(struct page *page) | |||
| 2752 | if (!eb) | 3074 | if (!eb) |
| 2753 | goto out; | 3075 | goto out; |
| 2754 | 3076 | ||
| 2755 | btrfs_tree_lock(eb); | 3077 | if (!btrfs_try_tree_write_lock(eb)) { |
| 3078 | flush_fn(data); | ||
| 3079 | btrfs_tree_lock(eb); | ||
| 3080 | } | ||
| 2756 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 3081 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
| 2757 | 3082 | ||
| 2758 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3083 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
| @@ -2767,7 +3092,10 @@ int btree_lock_page_hook(struct page *page) | |||
| 2767 | btrfs_tree_unlock(eb); | 3092 | btrfs_tree_unlock(eb); |
| 2768 | free_extent_buffer(eb); | 3093 | free_extent_buffer(eb); |
| 2769 | out: | 3094 | out: |
| 2770 | lock_page(page); | 3095 | if (!trylock_page(page)) { |
| 3096 | flush_fn(data); | ||
| 3097 | lock_page(page); | ||
| 3098 | } | ||
| 2771 | return 0; | 3099 | return 0; |
| 2772 | } | 3100 | } |
| 2773 | 3101 | ||
| @@ -3123,6 +3451,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
| 3123 | static struct extent_io_ops btree_extent_io_ops = { | 3451 | static struct extent_io_ops btree_extent_io_ops = { |
| 3124 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3452 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
| 3125 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3453 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
| 3454 | .readpage_io_failed_hook = btree_io_failed_hook, | ||
| 3126 | .submit_bio_hook = btree_submit_bio_hook, | 3455 | .submit_bio_hook = btree_submit_bio_hook, |
| 3127 | /* note we're sharing with inode.c for the merge bio hook */ | 3456 | /* note we're sharing with inode.c for the merge bio hook */ |
| 3128 | .merge_bio_hook = btrfs_merge_bio_hook, | 3457 | .merge_bio_hook = btrfs_merge_bio_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index bec3ea4bd67f..c99d0a8f13fa 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -40,6 +40,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
| 40 | u32 blocksize, u64 parent_transid); | 40 | u32 blocksize, u64 parent_transid); |
| 41 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 41 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
| 42 | u64 parent_transid); | 42 | u64 parent_transid); |
| 43 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
| 44 | int mirror_num, struct extent_buffer **eb); | ||
| 43 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 45 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
| 44 | u64 bytenr, u32 blocksize); | 46 | u64 bytenr, u32 blocksize); |
| 45 | int clean_tree_block(struct btrfs_trans_handle *trans, | 47 | int clean_tree_block(struct btrfs_trans_handle *trans, |
| @@ -83,8 +85,6 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | |||
| 83 | struct btrfs_fs_info *fs_info); | 85 | struct btrfs_fs_info *fs_info); |
| 84 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 86 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
| 85 | struct btrfs_root *root); | 87 | struct btrfs_root *root); |
| 86 | int btree_lock_page_hook(struct page *page); | ||
| 87 | |||
| 88 | 88 | ||
| 89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 90 | void btrfs_init_lockdep(void); | 90 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c9ee0e18bbdc..9879bd474632 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
| 24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/ratelimit.h> | ||
| 26 | #include "compat.h" | 27 | #include "compat.h" |
| 27 | #include "hash.h" | 28 | #include "hash.h" |
| 28 | #include "ctree.h" | 29 | #include "ctree.h" |
| @@ -52,6 +53,21 @@ enum { | |||
| 52 | CHUNK_ALLOC_LIMITED = 2, | 53 | CHUNK_ALLOC_LIMITED = 2, |
| 53 | }; | 54 | }; |
| 54 | 55 | ||
| 56 | /* | ||
| 57 | * Control how reservations are dealt with. | ||
| 58 | * | ||
| 59 | * RESERVE_FREE - freeing a reservation. | ||
| 60 | * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for | ||
| 61 | * ENOSPC accounting | ||
| 62 | * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update | ||
| 63 | * bytes_may_use as the ENOSPC accounting is done elsewhere | ||
| 64 | */ | ||
| 65 | enum { | ||
| 66 | RESERVE_FREE = 0, | ||
| 67 | RESERVE_ALLOC = 1, | ||
| 68 | RESERVE_ALLOC_NO_ACCOUNT = 2, | ||
| 69 | }; | ||
| 70 | |||
| 55 | static int update_block_group(struct btrfs_trans_handle *trans, | 71 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 56 | struct btrfs_root *root, | 72 | struct btrfs_root *root, |
| 57 | u64 bytenr, u64 num_bytes, int alloc); | 73 | u64 bytenr, u64 num_bytes, int alloc); |
| @@ -81,6 +97,8 @@ static int find_next_key(struct btrfs_path *path, int level, | |||
| 81 | struct btrfs_key *key); | 97 | struct btrfs_key *key); |
| 82 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 98 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| 83 | int dump_block_groups); | 99 | int dump_block_groups); |
| 100 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
| 101 | u64 num_bytes, int reserve); | ||
| 84 | 102 | ||
| 85 | static noinline int | 103 | static noinline int |
| 86 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 104 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
| @@ -104,7 +122,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | |||
| 104 | if (atomic_dec_and_test(&cache->count)) { | 122 | if (atomic_dec_and_test(&cache->count)) { |
| 105 | WARN_ON(cache->pinned > 0); | 123 | WARN_ON(cache->pinned > 0); |
| 106 | WARN_ON(cache->reserved > 0); | 124 | WARN_ON(cache->reserved > 0); |
| 107 | WARN_ON(cache->reserved_pinned > 0); | ||
| 108 | kfree(cache->free_space_ctl); | 125 | kfree(cache->free_space_ctl); |
| 109 | kfree(cache); | 126 | kfree(cache); |
| 110 | } | 127 | } |
| @@ -465,7 +482,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 465 | * we likely hold important locks. | 482 | * we likely hold important locks. |
| 466 | */ | 483 | */ |
| 467 | if (trans && (!trans->transaction->in_commit) && | 484 | if (trans && (!trans->transaction->in_commit) && |
| 468 | (root && root != root->fs_info->tree_root)) { | 485 | (root && root != root->fs_info->tree_root) && |
| 486 | btrfs_test_opt(root, SPACE_CACHE)) { | ||
| 469 | spin_lock(&cache->lock); | 487 | spin_lock(&cache->lock); |
| 470 | if (cache->cached != BTRFS_CACHE_NO) { | 488 | if (cache->cached != BTRFS_CACHE_NO) { |
| 471 | spin_unlock(&cache->lock); | 489 | spin_unlock(&cache->lock); |
| @@ -1770,18 +1788,18 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1770 | { | 1788 | { |
| 1771 | int ret; | 1789 | int ret; |
| 1772 | u64 discarded_bytes = 0; | 1790 | u64 discarded_bytes = 0; |
| 1773 | struct btrfs_multi_bio *multi = NULL; | 1791 | struct btrfs_bio *bbio = NULL; |
| 1774 | 1792 | ||
| 1775 | 1793 | ||
| 1776 | /* Tell the block device(s) that the sectors can be discarded */ | 1794 | /* Tell the block device(s) that the sectors can be discarded */ |
| 1777 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, | 1795 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, |
| 1778 | bytenr, &num_bytes, &multi, 0); | 1796 | bytenr, &num_bytes, &bbio, 0); |
| 1779 | if (!ret) { | 1797 | if (!ret) { |
| 1780 | struct btrfs_bio_stripe *stripe = multi->stripes; | 1798 | struct btrfs_bio_stripe *stripe = bbio->stripes; |
| 1781 | int i; | 1799 | int i; |
| 1782 | 1800 | ||
| 1783 | 1801 | ||
| 1784 | for (i = 0; i < multi->num_stripes; i++, stripe++) { | 1802 | for (i = 0; i < bbio->num_stripes; i++, stripe++) { |
| 1785 | if (!stripe->dev->can_discard) | 1803 | if (!stripe->dev->can_discard) |
| 1786 | continue; | 1804 | continue; |
| 1787 | 1805 | ||
| @@ -1800,7 +1818,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1800 | */ | 1818 | */ |
| 1801 | ret = 0; | 1819 | ret = 0; |
| 1802 | } | 1820 | } |
| 1803 | kfree(multi); | 1821 | kfree(bbio); |
| 1804 | } | 1822 | } |
| 1805 | 1823 | ||
| 1806 | if (actual_bytes) | 1824 | if (actual_bytes) |
| @@ -2700,6 +2718,13 @@ again: | |||
| 2700 | goto again; | 2718 | goto again; |
| 2701 | } | 2719 | } |
| 2702 | 2720 | ||
| 2721 | /* We've already setup this transaction, go ahead and exit */ | ||
| 2722 | if (block_group->cache_generation == trans->transid && | ||
| 2723 | i_size_read(inode)) { | ||
| 2724 | dcs = BTRFS_DC_SETUP; | ||
| 2725 | goto out_put; | ||
| 2726 | } | ||
| 2727 | |||
| 2703 | /* | 2728 | /* |
| 2704 | * We want to set the generation to 0, that way if anything goes wrong | 2729 | * We want to set the generation to 0, that way if anything goes wrong |
| 2705 | * from here on out we know not to trust this cache when we load up next | 2730 | * from here on out we know not to trust this cache when we load up next |
| @@ -2749,12 +2774,15 @@ again: | |||
| 2749 | if (!ret) | 2774 | if (!ret) |
| 2750 | dcs = BTRFS_DC_SETUP; | 2775 | dcs = BTRFS_DC_SETUP; |
| 2751 | btrfs_free_reserved_data_space(inode, num_pages); | 2776 | btrfs_free_reserved_data_space(inode, num_pages); |
| 2777 | |||
| 2752 | out_put: | 2778 | out_put: |
| 2753 | iput(inode); | 2779 | iput(inode); |
| 2754 | out_free: | 2780 | out_free: |
| 2755 | btrfs_release_path(path); | 2781 | btrfs_release_path(path); |
| 2756 | out: | 2782 | out: |
| 2757 | spin_lock(&block_group->lock); | 2783 | spin_lock(&block_group->lock); |
| 2784 | if (!ret) | ||
| 2785 | block_group->cache_generation = trans->transid; | ||
| 2758 | block_group->disk_cache_state = dcs; | 2786 | block_group->disk_cache_state = dcs; |
| 2759 | spin_unlock(&block_group->lock); | 2787 | spin_unlock(&block_group->lock); |
| 2760 | 2788 | ||
| @@ -3122,16 +3150,13 @@ commit_trans: | |||
| 3122 | return -ENOSPC; | 3150 | return -ENOSPC; |
| 3123 | } | 3151 | } |
| 3124 | data_sinfo->bytes_may_use += bytes; | 3152 | data_sinfo->bytes_may_use += bytes; |
| 3125 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
| 3126 | spin_unlock(&data_sinfo->lock); | 3153 | spin_unlock(&data_sinfo->lock); |
| 3127 | 3154 | ||
| 3128 | return 0; | 3155 | return 0; |
| 3129 | } | 3156 | } |
| 3130 | 3157 | ||
| 3131 | /* | 3158 | /* |
| 3132 | * called when we are clearing an delalloc extent from the | 3159 | * Called if we need to clear a data reservation for this inode. |
| 3133 | * inode's io_tree or there was an error for whatever reason | ||
| 3134 | * after calling btrfs_check_data_free_space | ||
| 3135 | */ | 3160 | */ |
| 3136 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | 3161 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
| 3137 | { | 3162 | { |
| @@ -3144,7 +3169,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
| 3144 | data_sinfo = BTRFS_I(inode)->space_info; | 3169 | data_sinfo = BTRFS_I(inode)->space_info; |
| 3145 | spin_lock(&data_sinfo->lock); | 3170 | spin_lock(&data_sinfo->lock); |
| 3146 | data_sinfo->bytes_may_use -= bytes; | 3171 | data_sinfo->bytes_may_use -= bytes; |
| 3147 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
| 3148 | spin_unlock(&data_sinfo->lock); | 3172 | spin_unlock(&data_sinfo->lock); |
| 3149 | } | 3173 | } |
| 3150 | 3174 | ||
| @@ -3165,6 +3189,7 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
| 3165 | struct btrfs_space_info *sinfo, u64 alloc_bytes, | 3189 | struct btrfs_space_info *sinfo, u64 alloc_bytes, |
| 3166 | int force) | 3190 | int force) |
| 3167 | { | 3191 | { |
| 3192 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
| 3168 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3193 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
| 3169 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; | 3194 | u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; |
| 3170 | u64 thresh; | 3195 | u64 thresh; |
| @@ -3173,11 +3198,18 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
| 3173 | return 1; | 3198 | return 1; |
| 3174 | 3199 | ||
| 3175 | /* | 3200 | /* |
| 3201 | * We need to take into account the global rsv because for all intents | ||
| 3202 | * and purposes it's used space. Don't worry about locking the | ||
| 3203 | * global_rsv, it doesn't change except when the transaction commits. | ||
| 3204 | */ | ||
| 3205 | num_allocated += global_rsv->size; | ||
| 3206 | |||
| 3207 | /* | ||
| 3176 | * in limited mode, we want to have some free space up to | 3208 | * in limited mode, we want to have some free space up to |
| 3177 | * about 1% of the FS size. | 3209 | * about 1% of the FS size. |
| 3178 | */ | 3210 | */ |
| 3179 | if (force == CHUNK_ALLOC_LIMITED) { | 3211 | if (force == CHUNK_ALLOC_LIMITED) { |
| 3180 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3212 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); |
| 3181 | thresh = max_t(u64, 64 * 1024 * 1024, | 3213 | thresh = max_t(u64, 64 * 1024 * 1024, |
| 3182 | div_factor_fine(thresh, 1)); | 3214 | div_factor_fine(thresh, 1)); |
| 3183 | 3215 | ||
| @@ -3199,7 +3231,7 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
| 3199 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) | 3231 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) |
| 3200 | return 0; | 3232 | return 0; |
| 3201 | 3233 | ||
| 3202 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | 3234 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); |
| 3203 | 3235 | ||
| 3204 | /* 256MB or 5% of the FS */ | 3236 | /* 256MB or 5% of the FS */ |
| 3205 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | 3237 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); |
| @@ -3302,24 +3334,26 @@ out: | |||
| 3302 | /* | 3334 | /* |
| 3303 | * shrink metadata reservation for delalloc | 3335 | * shrink metadata reservation for delalloc |
| 3304 | */ | 3336 | */ |
| 3305 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | 3337 | static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, |
| 3306 | struct btrfs_root *root, u64 to_reclaim, int sync) | 3338 | bool wait_ordered) |
| 3307 | { | 3339 | { |
| 3308 | struct btrfs_block_rsv *block_rsv; | 3340 | struct btrfs_block_rsv *block_rsv; |
| 3309 | struct btrfs_space_info *space_info; | 3341 | struct btrfs_space_info *space_info; |
| 3342 | struct btrfs_trans_handle *trans; | ||
| 3310 | u64 reserved; | 3343 | u64 reserved; |
| 3311 | u64 max_reclaim; | 3344 | u64 max_reclaim; |
| 3312 | u64 reclaimed = 0; | 3345 | u64 reclaimed = 0; |
| 3313 | long time_left; | 3346 | long time_left; |
| 3314 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3347 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
| 3315 | int loops = 0; | 3348 | int loops = 0; |
| 3316 | unsigned long progress; | 3349 | unsigned long progress; |
| 3317 | 3350 | ||
| 3351 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
| 3318 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3352 | block_rsv = &root->fs_info->delalloc_block_rsv; |
| 3319 | space_info = block_rsv->space_info; | 3353 | space_info = block_rsv->space_info; |
| 3320 | 3354 | ||
| 3321 | smp_mb(); | 3355 | smp_mb(); |
| 3322 | reserved = space_info->bytes_reserved; | 3356 | reserved = space_info->bytes_may_use; |
| 3323 | progress = space_info->reservation_progress; | 3357 | progress = space_info->reservation_progress; |
| 3324 | 3358 | ||
| 3325 | if (reserved == 0) | 3359 | if (reserved == 0) |
| @@ -3334,7 +3368,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
| 3334 | } | 3368 | } |
| 3335 | 3369 | ||
| 3336 | max_reclaim = min(reserved, to_reclaim); | 3370 | max_reclaim = min(reserved, to_reclaim); |
| 3337 | 3371 | nr_pages = max_t(unsigned long, nr_pages, | |
| 3372 | max_reclaim >> PAGE_CACHE_SHIFT); | ||
| 3338 | while (loops < 1024) { | 3373 | while (loops < 1024) { |
| 3339 | /* have the flusher threads jump in and do some IO */ | 3374 | /* have the flusher threads jump in and do some IO */ |
| 3340 | smp_mb(); | 3375 | smp_mb(); |
| @@ -3344,9 +3379,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
| 3344 | WB_REASON_FS_FREE_SPACE); | 3379 | WB_REASON_FS_FREE_SPACE); |
| 3345 | 3380 | ||
| 3346 | spin_lock(&space_info->lock); | 3381 | spin_lock(&space_info->lock); |
| 3347 | if (reserved > space_info->bytes_reserved) | 3382 | if (reserved > space_info->bytes_may_use) |
| 3348 | reclaimed += reserved - space_info->bytes_reserved; | 3383 | reclaimed += reserved - space_info->bytes_may_use; |
| 3349 | reserved = space_info->bytes_reserved; | 3384 | reserved = space_info->bytes_may_use; |
| 3350 | spin_unlock(&space_info->lock); | 3385 | spin_unlock(&space_info->lock); |
| 3351 | 3386 | ||
| 3352 | loops++; | 3387 | loops++; |
| @@ -3357,11 +3392,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
| 3357 | if (trans && trans->transaction->blocked) | 3392 | if (trans && trans->transaction->blocked) |
| 3358 | return -EAGAIN; | 3393 | return -EAGAIN; |
| 3359 | 3394 | ||
| 3360 | time_left = schedule_timeout_interruptible(1); | 3395 | if (wait_ordered && !trans) { |
| 3396 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 3397 | } else { | ||
| 3398 | time_left = schedule_timeout_interruptible(1); | ||
| 3361 | 3399 | ||
| 3362 | /* We were interrupted, exit */ | 3400 | /* We were interrupted, exit */ |
| 3363 | if (time_left) | 3401 | if (time_left) |
| 3364 | break; | 3402 | break; |
| 3403 | } | ||
| 3365 | 3404 | ||
| 3366 | /* we've kicked the IO a few times, if anything has been freed, | 3405 | /* we've kicked the IO a few times, if anything has been freed, |
| 3367 | * exit. There is no sense in looping here for a long time | 3406 | * exit. There is no sense in looping here for a long time |
| @@ -3376,34 +3415,90 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
| 3376 | } | 3415 | } |
| 3377 | 3416 | ||
| 3378 | } | 3417 | } |
| 3379 | if (reclaimed >= to_reclaim && !trans) | 3418 | |
| 3380 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 3381 | return reclaimed >= to_reclaim; | 3419 | return reclaimed >= to_reclaim; |
| 3382 | } | 3420 | } |
| 3383 | 3421 | ||
| 3384 | /* | 3422 | /** |
| 3385 | * Retries tells us how many times we've called reserve_metadata_bytes. The | 3423 | * maybe_commit_transaction - possibly commit the transaction if its ok to |
| 3386 | * idea is if this is the first call (retries == 0) then we will add to our | 3424 | * @root - the root we're allocating for |
| 3387 | * reserved count if we can't make the allocation in order to hold our place | 3425 | * @bytes - the number of bytes we want to reserve |
| 3388 | * while we go and try and free up space. That way for retries > 1 we don't try | 3426 | * @force - force the commit |
| 3389 | * and add space, we just check to see if the amount of unused space is >= the | ||
| 3390 | * total space, meaning that our reservation is valid. | ||
| 3391 | * | 3427 | * |
| 3392 | * However if we don't intend to retry this reservation, pass -1 as retries so | 3428 | * This will check to make sure that committing the transaction will actually |
| 3393 | * that it short circuits this logic. | 3429 | * get us somewhere and then commit the transaction if it does. Otherwise it |
| 3430 | * will return -ENOSPC. | ||
| 3394 | */ | 3431 | */ |
| 3395 | static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | 3432 | static int may_commit_transaction(struct btrfs_root *root, |
| 3396 | struct btrfs_root *root, | 3433 | struct btrfs_space_info *space_info, |
| 3434 | u64 bytes, int force) | ||
| 3435 | { | ||
| 3436 | struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv; | ||
| 3437 | struct btrfs_trans_handle *trans; | ||
| 3438 | |||
| 3439 | trans = (struct btrfs_trans_handle *)current->journal_info; | ||
| 3440 | if (trans) | ||
| 3441 | return -EAGAIN; | ||
| 3442 | |||
| 3443 | if (force) | ||
| 3444 | goto commit; | ||
| 3445 | |||
| 3446 | /* See if there is enough pinned space to make this reservation */ | ||
| 3447 | spin_lock(&space_info->lock); | ||
| 3448 | if (space_info->bytes_pinned >= bytes) { | ||
| 3449 | spin_unlock(&space_info->lock); | ||
| 3450 | goto commit; | ||
| 3451 | } | ||
| 3452 | spin_unlock(&space_info->lock); | ||
| 3453 | |||
| 3454 | /* | ||
| 3455 | * See if there is some space in the delayed insertion reservation for | ||
| 3456 | * this reservation. | ||
| 3457 | */ | ||
| 3458 | if (space_info != delayed_rsv->space_info) | ||
| 3459 | return -ENOSPC; | ||
| 3460 | |||
| 3461 | spin_lock(&delayed_rsv->lock); | ||
| 3462 | if (delayed_rsv->size < bytes) { | ||
| 3463 | spin_unlock(&delayed_rsv->lock); | ||
| 3464 | return -ENOSPC; | ||
| 3465 | } | ||
| 3466 | spin_unlock(&delayed_rsv->lock); | ||
| 3467 | |||
| 3468 | commit: | ||
| 3469 | trans = btrfs_join_transaction(root); | ||
| 3470 | if (IS_ERR(trans)) | ||
| 3471 | return -ENOSPC; | ||
| 3472 | |||
| 3473 | return btrfs_commit_transaction(trans, root); | ||
| 3474 | } | ||
| 3475 | |||
| 3476 | /** | ||
| 3477 | * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space | ||
| 3478 | * @root - the root we're allocating for | ||
| 3479 | * @block_rsv - the block_rsv we're allocating for | ||
| 3480 | * @orig_bytes - the number of bytes we want | ||
| 3481 | * @flush - wether or not we can flush to make our reservation | ||
| 3482 | * | ||
| 3483 | * This will reserve orgi_bytes number of bytes from the space info associated | ||
| 3484 | * with the block_rsv. If there is not enough space it will make an attempt to | ||
| 3485 | * flush out space to make room. It will do this by flushing delalloc if | ||
| 3486 | * possible or committing the transaction. If flush is 0 then no attempts to | ||
| 3487 | * regain reservations will be made and this will fail if there is not enough | ||
| 3488 | * space already. | ||
| 3489 | */ | ||
| 3490 | static int reserve_metadata_bytes(struct btrfs_root *root, | ||
| 3397 | struct btrfs_block_rsv *block_rsv, | 3491 | struct btrfs_block_rsv *block_rsv, |
| 3398 | u64 orig_bytes, int flush) | 3492 | u64 orig_bytes, int flush) |
| 3399 | { | 3493 | { |
| 3400 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3494 | struct btrfs_space_info *space_info = block_rsv->space_info; |
| 3401 | u64 unused; | 3495 | u64 used; |
| 3402 | u64 num_bytes = orig_bytes; | 3496 | u64 num_bytes = orig_bytes; |
| 3403 | int retries = 0; | 3497 | int retries = 0; |
| 3404 | int ret = 0; | 3498 | int ret = 0; |
| 3405 | bool committed = false; | 3499 | bool committed = false; |
| 3406 | bool flushing = false; | 3500 | bool flushing = false; |
| 3501 | bool wait_ordered = false; | ||
| 3407 | 3502 | ||
| 3408 | again: | 3503 | again: |
| 3409 | ret = 0; | 3504 | ret = 0; |
| @@ -3420,7 +3515,7 @@ again: | |||
| 3420 | * deadlock since we are waiting for the flusher to finish, but | 3515 | * deadlock since we are waiting for the flusher to finish, but |
| 3421 | * hold the current transaction open. | 3516 | * hold the current transaction open. |
| 3422 | */ | 3517 | */ |
| 3423 | if (trans) | 3518 | if (current->journal_info) |
| 3424 | return -EAGAIN; | 3519 | return -EAGAIN; |
| 3425 | ret = wait_event_interruptible(space_info->wait, | 3520 | ret = wait_event_interruptible(space_info->wait, |
| 3426 | !space_info->flush); | 3521 | !space_info->flush); |
| @@ -3432,9 +3527,9 @@ again: | |||
| 3432 | } | 3527 | } |
| 3433 | 3528 | ||
| 3434 | ret = -ENOSPC; | 3529 | ret = -ENOSPC; |
| 3435 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3530 | used = space_info->bytes_used + space_info->bytes_reserved + |
| 3436 | space_info->bytes_pinned + space_info->bytes_readonly + | 3531 | space_info->bytes_pinned + space_info->bytes_readonly + |
| 3437 | space_info->bytes_may_use; | 3532 | space_info->bytes_may_use; |
| 3438 | 3533 | ||
| 3439 | /* | 3534 | /* |
| 3440 | * The idea here is that we've not already over-reserved the block group | 3535 | * The idea here is that we've not already over-reserved the block group |
| @@ -3443,10 +3538,9 @@ again: | |||
| 3443 | * lets start flushing stuff first and then come back and try to make | 3538 | * lets start flushing stuff first and then come back and try to make |
| 3444 | * our reservation. | 3539 | * our reservation. |
| 3445 | */ | 3540 | */ |
| 3446 | if (unused <= space_info->total_bytes) { | 3541 | if (used <= space_info->total_bytes) { |
| 3447 | unused = space_info->total_bytes - unused; | 3542 | if (used + orig_bytes <= space_info->total_bytes) { |
| 3448 | if (unused >= num_bytes) { | 3543 | space_info->bytes_may_use += orig_bytes; |
| 3449 | space_info->bytes_reserved += orig_bytes; | ||
| 3450 | ret = 0; | 3544 | ret = 0; |
| 3451 | } else { | 3545 | } else { |
| 3452 | /* | 3546 | /* |
| @@ -3462,10 +3556,64 @@ again: | |||
| 3462 | * amount plus the amount of bytes that we need for this | 3556 | * amount plus the amount of bytes that we need for this |
| 3463 | * reservation. | 3557 | * reservation. |
| 3464 | */ | 3558 | */ |
| 3465 | num_bytes = unused - space_info->total_bytes + | 3559 | wait_ordered = true; |
| 3560 | num_bytes = used - space_info->total_bytes + | ||
| 3466 | (orig_bytes * (retries + 1)); | 3561 | (orig_bytes * (retries + 1)); |
| 3467 | } | 3562 | } |
| 3468 | 3563 | ||
| 3564 | if (ret) { | ||
| 3565 | u64 profile = btrfs_get_alloc_profile(root, 0); | ||
| 3566 | u64 avail; | ||
| 3567 | |||
| 3568 | /* | ||
| 3569 | * If we have a lot of space that's pinned, don't bother doing | ||
| 3570 | * the overcommit dance yet and just commit the transaction. | ||
| 3571 | */ | ||
| 3572 | avail = (space_info->total_bytes - space_info->bytes_used) * 8; | ||
| 3573 | do_div(avail, 10); | ||
| 3574 | if (space_info->bytes_pinned >= avail && flush && !committed) { | ||
| 3575 | space_info->flush = 1; | ||
| 3576 | flushing = true; | ||
| 3577 | spin_unlock(&space_info->lock); | ||
| 3578 | ret = may_commit_transaction(root, space_info, | ||
| 3579 | orig_bytes, 1); | ||
| 3580 | if (ret) | ||
| 3581 | goto out; | ||
| 3582 | committed = true; | ||
| 3583 | goto again; | ||
| 3584 | } | ||
| 3585 | |||
| 3586 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 3587 | avail = root->fs_info->free_chunk_space; | ||
| 3588 | |||
| 3589 | /* | ||
| 3590 | * If we have dup, raid1 or raid10 then only half of the free | ||
| 3591 | * space is actually useable. | ||
| 3592 | */ | ||
| 3593 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3594 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3595 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 3596 | avail >>= 1; | ||
| 3597 | |||
| 3598 | /* | ||
| 3599 | * If we aren't flushing don't let us overcommit too much, say | ||
| 3600 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
| 3601 | * 1/2 of the space. | ||
| 3602 | */ | ||
| 3603 | if (flush) | ||
| 3604 | avail >>= 3; | ||
| 3605 | else | ||
| 3606 | avail >>= 1; | ||
| 3607 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 3608 | |||
| 3609 | if (used + num_bytes < space_info->total_bytes + avail) { | ||
| 3610 | space_info->bytes_may_use += orig_bytes; | ||
| 3611 | ret = 0; | ||
| 3612 | } else { | ||
| 3613 | wait_ordered = true; | ||
| 3614 | } | ||
| 3615 | } | ||
| 3616 | |||
| 3469 | /* | 3617 | /* |
| 3470 | * Couldn't make our reservation, save our place so while we're trying | 3618 | * Couldn't make our reservation, save our place so while we're trying |
| 3471 | * to reclaim space we can actually use it instead of somebody else | 3619 | * to reclaim space we can actually use it instead of somebody else |
| @@ -3485,7 +3633,7 @@ again: | |||
| 3485 | * We do synchronous shrinking since we don't actually unreserve | 3633 | * We do synchronous shrinking since we don't actually unreserve |
| 3486 | * metadata until after the IO is completed. | 3634 | * metadata until after the IO is completed. |
| 3487 | */ | 3635 | */ |
| 3488 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3636 | ret = shrink_delalloc(root, num_bytes, wait_ordered); |
| 3489 | if (ret < 0) | 3637 | if (ret < 0) |
| 3490 | goto out; | 3638 | goto out; |
| 3491 | 3639 | ||
| @@ -3497,35 +3645,17 @@ again: | |||
| 3497 | * so go back around and try again. | 3645 | * so go back around and try again. |
| 3498 | */ | 3646 | */ |
| 3499 | if (retries < 2) { | 3647 | if (retries < 2) { |
| 3648 | wait_ordered = true; | ||
| 3500 | retries++; | 3649 | retries++; |
| 3501 | goto again; | 3650 | goto again; |
| 3502 | } | 3651 | } |
| 3503 | 3652 | ||
| 3504 | /* | ||
| 3505 | * Not enough space to be reclaimed, don't bother committing the | ||
| 3506 | * transaction. | ||
| 3507 | */ | ||
| 3508 | spin_lock(&space_info->lock); | ||
| 3509 | if (space_info->bytes_pinned < orig_bytes) | ||
| 3510 | ret = -ENOSPC; | ||
| 3511 | spin_unlock(&space_info->lock); | ||
| 3512 | if (ret) | ||
| 3513 | goto out; | ||
| 3514 | |||
| 3515 | ret = -EAGAIN; | ||
| 3516 | if (trans) | ||
| 3517 | goto out; | ||
| 3518 | |||
| 3519 | ret = -ENOSPC; | 3653 | ret = -ENOSPC; |
| 3520 | if (committed) | 3654 | if (committed) |
| 3521 | goto out; | 3655 | goto out; |
| 3522 | 3656 | ||
| 3523 | trans = btrfs_join_transaction(root); | 3657 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); |
| 3524 | if (IS_ERR(trans)) | ||
| 3525 | goto out; | ||
| 3526 | ret = btrfs_commit_transaction(trans, root); | ||
| 3527 | if (!ret) { | 3658 | if (!ret) { |
| 3528 | trans = NULL; | ||
| 3529 | committed = true; | 3659 | committed = true; |
| 3530 | goto again; | 3660 | goto again; |
| 3531 | } | 3661 | } |
| @@ -3543,10 +3673,12 @@ out: | |||
| 3543 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | 3673 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, |
| 3544 | struct btrfs_root *root) | 3674 | struct btrfs_root *root) |
| 3545 | { | 3675 | { |
| 3546 | struct btrfs_block_rsv *block_rsv; | 3676 | struct btrfs_block_rsv *block_rsv = NULL; |
| 3547 | if (root->ref_cows) | 3677 | |
| 3678 | if (root->ref_cows || root == root->fs_info->csum_root) | ||
| 3548 | block_rsv = trans->block_rsv; | 3679 | block_rsv = trans->block_rsv; |
| 3549 | else | 3680 | |
| 3681 | if (!block_rsv) | ||
| 3550 | block_rsv = root->block_rsv; | 3682 | block_rsv = root->block_rsv; |
| 3551 | 3683 | ||
| 3552 | if (!block_rsv) | 3684 | if (!block_rsv) |
| @@ -3617,7 +3749,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
| 3617 | } | 3749 | } |
| 3618 | if (num_bytes) { | 3750 | if (num_bytes) { |
| 3619 | spin_lock(&space_info->lock); | 3751 | spin_lock(&space_info->lock); |
| 3620 | space_info->bytes_reserved -= num_bytes; | 3752 | space_info->bytes_may_use -= num_bytes; |
| 3621 | space_info->reservation_progress++; | 3753 | space_info->reservation_progress++; |
| 3622 | spin_unlock(&space_info->lock); | 3754 | spin_unlock(&space_info->lock); |
| 3623 | } | 3755 | } |
| @@ -3641,9 +3773,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | |||
| 3641 | { | 3773 | { |
| 3642 | memset(rsv, 0, sizeof(*rsv)); | 3774 | memset(rsv, 0, sizeof(*rsv)); |
| 3643 | spin_lock_init(&rsv->lock); | 3775 | spin_lock_init(&rsv->lock); |
| 3644 | atomic_set(&rsv->usage, 1); | ||
| 3645 | rsv->priority = 6; | ||
| 3646 | INIT_LIST_HEAD(&rsv->list); | ||
| 3647 | } | 3776 | } |
| 3648 | 3777 | ||
| 3649 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | 3778 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) |
| @@ -3664,38 +3793,38 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
| 3664 | void btrfs_free_block_rsv(struct btrfs_root *root, | 3793 | void btrfs_free_block_rsv(struct btrfs_root *root, |
| 3665 | struct btrfs_block_rsv *rsv) | 3794 | struct btrfs_block_rsv *rsv) |
| 3666 | { | 3795 | { |
| 3667 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | 3796 | btrfs_block_rsv_release(root, rsv, (u64)-1); |
| 3668 | btrfs_block_rsv_release(root, rsv, (u64)-1); | 3797 | kfree(rsv); |
| 3669 | if (!rsv->durable) | ||
| 3670 | kfree(rsv); | ||
| 3671 | } | ||
| 3672 | } | 3798 | } |
| 3673 | 3799 | ||
| 3674 | /* | 3800 | int btrfs_block_rsv_add(struct btrfs_root *root, |
| 3675 | * make the block_rsv struct be able to capture freed space. | 3801 | struct btrfs_block_rsv *block_rsv, |
| 3676 | * the captured space will re-add to the the block_rsv struct | 3802 | u64 num_bytes) |
| 3677 | * after transaction commit | ||
| 3678 | */ | ||
| 3679 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
| 3680 | struct btrfs_block_rsv *block_rsv) | ||
| 3681 | { | 3803 | { |
| 3682 | block_rsv->durable = 1; | 3804 | int ret; |
| 3683 | mutex_lock(&fs_info->durable_block_rsv_mutex); | 3805 | |
| 3684 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | 3806 | if (num_bytes == 0) |
| 3685 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | 3807 | return 0; |
| 3808 | |||
| 3809 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); | ||
| 3810 | if (!ret) { | ||
| 3811 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
| 3812 | return 0; | ||
| 3813 | } | ||
| 3814 | |||
| 3815 | return ret; | ||
| 3686 | } | 3816 | } |
| 3687 | 3817 | ||
| 3688 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | 3818 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, |
| 3689 | struct btrfs_root *root, | 3819 | struct btrfs_block_rsv *block_rsv, |
| 3690 | struct btrfs_block_rsv *block_rsv, | 3820 | u64 num_bytes) |
| 3691 | u64 num_bytes) | ||
| 3692 | { | 3821 | { |
| 3693 | int ret; | 3822 | int ret; |
| 3694 | 3823 | ||
| 3695 | if (num_bytes == 0) | 3824 | if (num_bytes == 0) |
| 3696 | return 0; | 3825 | return 0; |
| 3697 | 3826 | ||
| 3698 | ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); | 3827 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0); |
| 3699 | if (!ret) { | 3828 | if (!ret) { |
| 3700 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3829 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
| 3701 | return 0; | 3830 | return 0; |
| @@ -3704,55 +3833,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | |||
| 3704 | return ret; | 3833 | return ret; |
| 3705 | } | 3834 | } |
| 3706 | 3835 | ||
| 3707 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | 3836 | int btrfs_block_rsv_check(struct btrfs_root *root, |
| 3708 | struct btrfs_root *root, | 3837 | struct btrfs_block_rsv *block_rsv, int min_factor) |
| 3709 | struct btrfs_block_rsv *block_rsv, | ||
| 3710 | u64 min_reserved, int min_factor) | ||
| 3711 | { | 3838 | { |
| 3712 | u64 num_bytes = 0; | 3839 | u64 num_bytes = 0; |
| 3713 | int commit_trans = 0; | ||
| 3714 | int ret = -ENOSPC; | 3840 | int ret = -ENOSPC; |
| 3715 | 3841 | ||
| 3716 | if (!block_rsv) | 3842 | if (!block_rsv) |
| 3717 | return 0; | 3843 | return 0; |
| 3718 | 3844 | ||
| 3719 | spin_lock(&block_rsv->lock); | 3845 | spin_lock(&block_rsv->lock); |
| 3720 | if (min_factor > 0) | 3846 | num_bytes = div_factor(block_rsv->size, min_factor); |
| 3721 | num_bytes = div_factor(block_rsv->size, min_factor); | 3847 | if (block_rsv->reserved >= num_bytes) |
| 3722 | if (min_reserved > num_bytes) | 3848 | ret = 0; |
| 3723 | num_bytes = min_reserved; | 3849 | spin_unlock(&block_rsv->lock); |
| 3724 | 3850 | ||
| 3725 | if (block_rsv->reserved >= num_bytes) { | 3851 | return ret; |
| 3852 | } | ||
| 3853 | |||
| 3854 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
| 3855 | struct btrfs_block_rsv *block_rsv, | ||
| 3856 | u64 min_reserved) | ||
| 3857 | { | ||
| 3858 | u64 num_bytes = 0; | ||
| 3859 | int ret = -ENOSPC; | ||
| 3860 | |||
| 3861 | if (!block_rsv) | ||
| 3862 | return 0; | ||
| 3863 | |||
| 3864 | spin_lock(&block_rsv->lock); | ||
| 3865 | num_bytes = min_reserved; | ||
| 3866 | if (block_rsv->reserved >= num_bytes) | ||
| 3726 | ret = 0; | 3867 | ret = 0; |
| 3727 | } else { | 3868 | else |
| 3728 | num_bytes -= block_rsv->reserved; | 3869 | num_bytes -= block_rsv->reserved; |
| 3729 | if (block_rsv->durable && | ||
| 3730 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
| 3731 | commit_trans = 1; | ||
| 3732 | } | ||
| 3733 | spin_unlock(&block_rsv->lock); | 3870 | spin_unlock(&block_rsv->lock); |
| 3871 | |||
| 3734 | if (!ret) | 3872 | if (!ret) |
| 3735 | return 0; | 3873 | return 0; |
| 3736 | 3874 | ||
| 3737 | if (block_rsv->refill_used) { | 3875 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); |
| 3738 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 3876 | if (!ret) { |
| 3739 | num_bytes, 0); | 3877 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
| 3740 | if (!ret) { | ||
| 3741 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
| 3742 | return 0; | ||
| 3743 | } | ||
| 3744 | } | ||
| 3745 | |||
| 3746 | if (commit_trans) { | ||
| 3747 | if (trans) | ||
| 3748 | return -EAGAIN; | ||
| 3749 | trans = btrfs_join_transaction(root); | ||
| 3750 | BUG_ON(IS_ERR(trans)); | ||
| 3751 | ret = btrfs_commit_transaction(trans, root); | ||
| 3752 | return 0; | 3878 | return 0; |
| 3753 | } | 3879 | } |
| 3754 | 3880 | ||
| 3755 | return -ENOSPC; | 3881 | return ret; |
| 3756 | } | 3882 | } |
| 3757 | 3883 | ||
| 3758 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3884 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
| @@ -3784,7 +3910,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
| 3784 | u64 num_bytes; | 3910 | u64 num_bytes; |
| 3785 | u64 meta_used; | 3911 | u64 meta_used; |
| 3786 | u64 data_used; | 3912 | u64 data_used; |
| 3787 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | 3913 | int csum_size = btrfs_super_csum_size(fs_info->super_copy); |
| 3788 | 3914 | ||
| 3789 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | 3915 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); |
| 3790 | spin_lock(&sinfo->lock); | 3916 | spin_lock(&sinfo->lock); |
| @@ -3828,12 +3954,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 3828 | if (sinfo->total_bytes > num_bytes) { | 3954 | if (sinfo->total_bytes > num_bytes) { |
| 3829 | num_bytes = sinfo->total_bytes - num_bytes; | 3955 | num_bytes = sinfo->total_bytes - num_bytes; |
| 3830 | block_rsv->reserved += num_bytes; | 3956 | block_rsv->reserved += num_bytes; |
| 3831 | sinfo->bytes_reserved += num_bytes; | 3957 | sinfo->bytes_may_use += num_bytes; |
| 3832 | } | 3958 | } |
| 3833 | 3959 | ||
| 3834 | if (block_rsv->reserved >= block_rsv->size) { | 3960 | if (block_rsv->reserved >= block_rsv->size) { |
| 3835 | num_bytes = block_rsv->reserved - block_rsv->size; | 3961 | num_bytes = block_rsv->reserved - block_rsv->size; |
| 3836 | sinfo->bytes_reserved -= num_bytes; | 3962 | sinfo->bytes_may_use -= num_bytes; |
| 3837 | sinfo->reservation_progress++; | 3963 | sinfo->reservation_progress++; |
| 3838 | block_rsv->reserved = block_rsv->size; | 3964 | block_rsv->reserved = block_rsv->size; |
| 3839 | block_rsv->full = 1; | 3965 | block_rsv->full = 1; |
| @@ -3849,16 +3975,13 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 3849 | 3975 | ||
| 3850 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | 3976 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
| 3851 | fs_info->chunk_block_rsv.space_info = space_info; | 3977 | fs_info->chunk_block_rsv.space_info = space_info; |
| 3852 | fs_info->chunk_block_rsv.priority = 10; | ||
| 3853 | 3978 | ||
| 3854 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 3979 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
| 3855 | fs_info->global_block_rsv.space_info = space_info; | 3980 | fs_info->global_block_rsv.space_info = space_info; |
| 3856 | fs_info->global_block_rsv.priority = 10; | ||
| 3857 | fs_info->global_block_rsv.refill_used = 1; | ||
| 3858 | fs_info->delalloc_block_rsv.space_info = space_info; | 3981 | fs_info->delalloc_block_rsv.space_info = space_info; |
| 3859 | fs_info->trans_block_rsv.space_info = space_info; | 3982 | fs_info->trans_block_rsv.space_info = space_info; |
| 3860 | fs_info->empty_block_rsv.space_info = space_info; | 3983 | fs_info->empty_block_rsv.space_info = space_info; |
| 3861 | fs_info->empty_block_rsv.priority = 10; | 3984 | fs_info->delayed_block_rsv.space_info = space_info; |
| 3862 | 3985 | ||
| 3863 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | 3986 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; |
| 3864 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | 3987 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; |
| @@ -3866,10 +3989,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 3866 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | 3989 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; |
| 3867 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | 3990 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; |
| 3868 | 3991 | ||
| 3869 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
| 3870 | |||
| 3871 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
| 3872 | |||
| 3873 | update_global_block_rsv(fs_info); | 3992 | update_global_block_rsv(fs_info); |
| 3874 | } | 3993 | } |
| 3875 | 3994 | ||
| @@ -3882,37 +4001,8 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 3882 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | 4001 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); |
| 3883 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | 4002 | WARN_ON(fs_info->chunk_block_rsv.size > 0); |
| 3884 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | 4003 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
| 3885 | } | 4004 | WARN_ON(fs_info->delayed_block_rsv.size > 0); |
| 3886 | 4005 | WARN_ON(fs_info->delayed_block_rsv.reserved > 0); | |
| 3887 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 3888 | struct btrfs_root *root, | ||
| 3889 | struct btrfs_block_rsv *rsv) | ||
| 3890 | { | ||
| 3891 | struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; | ||
| 3892 | u64 num_bytes; | ||
| 3893 | int ret; | ||
| 3894 | |||
| 3895 | /* | ||
| 3896 | * Truncate should be freeing data, but give us 2 items just in case it | ||
| 3897 | * needs to use some space. We may want to be smarter about this in the | ||
| 3898 | * future. | ||
| 3899 | */ | ||
| 3900 | num_bytes = btrfs_calc_trans_metadata_size(root, 2); | ||
| 3901 | |||
| 3902 | /* We already have enough bytes, just return */ | ||
| 3903 | if (rsv->reserved >= num_bytes) | ||
| 3904 | return 0; | ||
| 3905 | |||
| 3906 | num_bytes -= rsv->reserved; | ||
| 3907 | |||
| 3908 | /* | ||
| 3909 | * You should have reserved enough space before hand to do this, so this | ||
| 3910 | * should not fail. | ||
| 3911 | */ | ||
| 3912 | ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); | ||
| 3913 | BUG_ON(ret); | ||
| 3914 | |||
| 3915 | return 0; | ||
| 3916 | } | 4006 | } |
| 3917 | 4007 | ||
| 3918 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 4008 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
| @@ -3921,9 +4011,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | |||
| 3921 | if (!trans->bytes_reserved) | 4011 | if (!trans->bytes_reserved) |
| 3922 | return; | 4012 | return; |
| 3923 | 4013 | ||
| 3924 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | 4014 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); |
| 3925 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
| 3926 | trans->bytes_reserved); | ||
| 3927 | trans->bytes_reserved = 0; | 4015 | trans->bytes_reserved = 0; |
| 3928 | } | 4016 | } |
| 3929 | 4017 | ||
| @@ -3965,11 +4053,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
| 3965 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4053 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
| 3966 | } | 4054 | } |
| 3967 | 4055 | ||
| 4056 | /** | ||
| 4057 | * drop_outstanding_extent - drop an outstanding extent | ||
| 4058 | * @inode: the inode we're dropping the extent for | ||
| 4059 | * | ||
| 4060 | * This is called when we are freeing up an outstanding extent, either called | ||
| 4061 | * after an error or after an extent is written. This will return the number of | ||
| 4062 | * reserved extents that need to be freed. This must be called with | ||
| 4063 | * BTRFS_I(inode)->lock held. | ||
| 4064 | */ | ||
| 3968 | static unsigned drop_outstanding_extent(struct inode *inode) | 4065 | static unsigned drop_outstanding_extent(struct inode *inode) |
| 3969 | { | 4066 | { |
| 3970 | unsigned dropped_extents = 0; | 4067 | unsigned dropped_extents = 0; |
| 3971 | 4068 | ||
| 3972 | spin_lock(&BTRFS_I(inode)->lock); | ||
| 3973 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | 4069 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); |
| 3974 | BTRFS_I(inode)->outstanding_extents--; | 4070 | BTRFS_I(inode)->outstanding_extents--; |
| 3975 | 4071 | ||
| @@ -3979,19 +4075,70 @@ static unsigned drop_outstanding_extent(struct inode *inode) | |||
| 3979 | */ | 4075 | */ |
| 3980 | if (BTRFS_I(inode)->outstanding_extents >= | 4076 | if (BTRFS_I(inode)->outstanding_extents >= |
| 3981 | BTRFS_I(inode)->reserved_extents) | 4077 | BTRFS_I(inode)->reserved_extents) |
| 3982 | goto out; | 4078 | return 0; |
| 3983 | 4079 | ||
| 3984 | dropped_extents = BTRFS_I(inode)->reserved_extents - | 4080 | dropped_extents = BTRFS_I(inode)->reserved_extents - |
| 3985 | BTRFS_I(inode)->outstanding_extents; | 4081 | BTRFS_I(inode)->outstanding_extents; |
| 3986 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | 4082 | BTRFS_I(inode)->reserved_extents -= dropped_extents; |
| 3987 | out: | ||
| 3988 | spin_unlock(&BTRFS_I(inode)->lock); | ||
| 3989 | return dropped_extents; | 4083 | return dropped_extents; |
| 3990 | } | 4084 | } |
| 3991 | 4085 | ||
| 3992 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 4086 | /** |
| 4087 | * calc_csum_metadata_size - return the amount of metada space that must be | ||
| 4088 | * reserved/free'd for the given bytes. | ||
| 4089 | * @inode: the inode we're manipulating | ||
| 4090 | * @num_bytes: the number of bytes in question | ||
| 4091 | * @reserve: 1 if we are reserving space, 0 if we are freeing space | ||
| 4092 | * | ||
| 4093 | * This adjusts the number of csum_bytes in the inode and then returns the | ||
| 4094 | * correct amount of metadata that must either be reserved or freed. We | ||
| 4095 | * calculate how many checksums we can fit into one leaf and then divide the | ||
| 4096 | * number of bytes that will need to be checksumed by this value to figure out | ||
| 4097 | * how many checksums will be required. If we are adding bytes then the number | ||
| 4098 | * may go up and we will return the number of additional bytes that must be | ||
| 4099 | * reserved. If it is going down we will return the number of bytes that must | ||
| 4100 | * be freed. | ||
| 4101 | * | ||
| 4102 | * This must be called with BTRFS_I(inode)->lock held. | ||
| 4103 | */ | ||
| 4104 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, | ||
| 4105 | int reserve) | ||
| 3993 | { | 4106 | { |
| 3994 | return num_bytes >>= 3; | 4107 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 4108 | u64 csum_size; | ||
| 4109 | int num_csums_per_leaf; | ||
| 4110 | int num_csums; | ||
| 4111 | int old_csums; | ||
| 4112 | |||
| 4113 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && | ||
| 4114 | BTRFS_I(inode)->csum_bytes == 0) | ||
| 4115 | return 0; | ||
| 4116 | |||
| 4117 | old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | ||
| 4118 | if (reserve) | ||
| 4119 | BTRFS_I(inode)->csum_bytes += num_bytes; | ||
| 4120 | else | ||
| 4121 | BTRFS_I(inode)->csum_bytes -= num_bytes; | ||
| 4122 | csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); | ||
| 4123 | num_csums_per_leaf = (int)div64_u64(csum_size, | ||
| 4124 | sizeof(struct btrfs_csum_item) + | ||
| 4125 | sizeof(struct btrfs_disk_key)); | ||
| 4126 | num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | ||
| 4127 | num_csums = num_csums + num_csums_per_leaf - 1; | ||
| 4128 | num_csums = num_csums / num_csums_per_leaf; | ||
| 4129 | |||
| 4130 | old_csums = old_csums + num_csums_per_leaf - 1; | ||
| 4131 | old_csums = old_csums / num_csums_per_leaf; | ||
| 4132 | |||
| 4133 | /* No change, no need to reserve more */ | ||
| 4134 | if (old_csums == num_csums) | ||
| 4135 | return 0; | ||
| 4136 | |||
| 4137 | if (reserve) | ||
| 4138 | return btrfs_calc_trans_metadata_size(root, | ||
| 4139 | num_csums - old_csums); | ||
| 4140 | |||
| 4141 | return btrfs_calc_trans_metadata_size(root, old_csums - num_csums); | ||
| 3995 | } | 4142 | } |
| 3996 | 4143 | ||
| 3997 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | 4144 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) |
| @@ -4000,9 +4147,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4000 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4147 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
| 4001 | u64 to_reserve = 0; | 4148 | u64 to_reserve = 0; |
| 4002 | unsigned nr_extents = 0; | 4149 | unsigned nr_extents = 0; |
| 4150 | int flush = 1; | ||
| 4003 | int ret; | 4151 | int ret; |
| 4004 | 4152 | ||
| 4005 | if (btrfs_transaction_in_commit(root->fs_info)) | 4153 | if (btrfs_is_free_space_inode(root, inode)) |
| 4154 | flush = 0; | ||
| 4155 | |||
| 4156 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | ||
| 4006 | schedule_timeout(1); | 4157 | schedule_timeout(1); |
| 4007 | 4158 | ||
| 4008 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4159 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
| @@ -4018,18 +4169,29 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4018 | 4169 | ||
| 4019 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4170 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
| 4020 | } | 4171 | } |
| 4172 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); | ||
| 4021 | spin_unlock(&BTRFS_I(inode)->lock); | 4173 | spin_unlock(&BTRFS_I(inode)->lock); |
| 4022 | 4174 | ||
| 4023 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4175 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
| 4024 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | ||
| 4025 | if (ret) { | 4176 | if (ret) { |
| 4177 | u64 to_free = 0; | ||
| 4026 | unsigned dropped; | 4178 | unsigned dropped; |
| 4179 | |||
| 4180 | spin_lock(&BTRFS_I(inode)->lock); | ||
| 4181 | dropped = drop_outstanding_extent(inode); | ||
| 4182 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
| 4183 | spin_unlock(&BTRFS_I(inode)->lock); | ||
| 4184 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
| 4185 | |||
| 4027 | /* | 4186 | /* |
| 4028 | * We don't need the return value since our reservation failed, | 4187 | * Somebody could have come in and twiddled with the |
| 4029 | * we just need to clean up our counter. | 4188 | * reservation, so if we have to free more than we would have |
| 4189 | * reserved from this reservation go ahead and release those | ||
| 4190 | * bytes. | ||
| 4030 | */ | 4191 | */ |
| 4031 | dropped = drop_outstanding_extent(inode); | 4192 | to_free -= to_reserve; |
| 4032 | WARN_ON(dropped > 1); | 4193 | if (to_free) |
| 4194 | btrfs_block_rsv_release(root, block_rsv, to_free); | ||
| 4033 | return ret; | 4195 | return ret; |
| 4034 | } | 4196 | } |
| 4035 | 4197 | ||
| @@ -4038,6 +4200,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4038 | return 0; | 4200 | return 0; |
| 4039 | } | 4201 | } |
| 4040 | 4202 | ||
| 4203 | /** | ||
| 4204 | * btrfs_delalloc_release_metadata - release a metadata reservation for an inode | ||
| 4205 | * @inode: the inode to release the reservation for | ||
| 4206 | * @num_bytes: the number of bytes we're releasing | ||
| 4207 | * | ||
| 4208 | * This will release the metadata reservation for an inode. This can be called | ||
| 4209 | * once we complete IO for a given set of bytes to release their metadata | ||
| 4210 | * reservations. | ||
| 4211 | */ | ||
| 4041 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4212 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
| 4042 | { | 4213 | { |
| 4043 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4214 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| @@ -4045,9 +4216,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
| 4045 | unsigned dropped; | 4216 | unsigned dropped; |
| 4046 | 4217 | ||
| 4047 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4218 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
| 4219 | spin_lock(&BTRFS_I(inode)->lock); | ||
| 4048 | dropped = drop_outstanding_extent(inode); | 4220 | dropped = drop_outstanding_extent(inode); |
| 4049 | 4221 | ||
| 4050 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4222 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); |
| 4223 | spin_unlock(&BTRFS_I(inode)->lock); | ||
| 4051 | if (dropped > 0) | 4224 | if (dropped > 0) |
| 4052 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4225 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
| 4053 | 4226 | ||
| @@ -4055,6 +4228,21 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
| 4055 | to_free); | 4228 | to_free); |
| 4056 | } | 4229 | } |
| 4057 | 4230 | ||
| 4231 | /** | ||
| 4232 | * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc | ||
| 4233 | * @inode: inode we're writing to | ||
| 4234 | * @num_bytes: the number of bytes we want to allocate | ||
| 4235 | * | ||
| 4236 | * This will do the following things | ||
| 4237 | * | ||
| 4238 | * o reserve space in the data space info for num_bytes | ||
| 4239 | * o reserve space in the metadata space info based on number of outstanding | ||
| 4240 | * extents and how much csums will be needed | ||
| 4241 | * o add to the inodes ->delalloc_bytes | ||
| 4242 | * o add it to the fs_info's delalloc inodes list. | ||
| 4243 | * | ||
| 4244 | * This will return 0 for success and -ENOSPC if there is no space left. | ||
| 4245 | */ | ||
| 4058 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | 4246 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) |
| 4059 | { | 4247 | { |
| 4060 | int ret; | 4248 | int ret; |
| @@ -4072,6 +4260,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | |||
| 4072 | return 0; | 4260 | return 0; |
| 4073 | } | 4261 | } |
| 4074 | 4262 | ||
| 4263 | /** | ||
| 4264 | * btrfs_delalloc_release_space - release data and metadata space for delalloc | ||
| 4265 | * @inode: inode we're releasing space for | ||
| 4266 | * @num_bytes: the number of bytes we want to free up | ||
| 4267 | * | ||
| 4268 | * This must be matched with a call to btrfs_delalloc_reserve_space. This is | ||
| 4269 | * called in the case that we don't need the metadata AND data reservations | ||
| 4270 | * anymore. So if there is an error or we insert an inline extent. | ||
| 4271 | * | ||
| 4272 | * This function will release the metadata space that was not used and will | ||
| 4273 | * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes | ||
| 4274 | * list if there are no delalloc bytes left. | ||
| 4275 | */ | ||
| 4075 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | 4276 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) |
| 4076 | { | 4277 | { |
| 4077 | btrfs_delalloc_release_metadata(inode, num_bytes); | 4278 | btrfs_delalloc_release_metadata(inode, num_bytes); |
| @@ -4091,12 +4292,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 4091 | 4292 | ||
| 4092 | /* block accounting for super block */ | 4293 | /* block accounting for super block */ |
| 4093 | spin_lock(&info->delalloc_lock); | 4294 | spin_lock(&info->delalloc_lock); |
| 4094 | old_val = btrfs_super_bytes_used(&info->super_copy); | 4295 | old_val = btrfs_super_bytes_used(info->super_copy); |
| 4095 | if (alloc) | 4296 | if (alloc) |
| 4096 | old_val += num_bytes; | 4297 | old_val += num_bytes; |
| 4097 | else | 4298 | else |
| 4098 | old_val -= num_bytes; | 4299 | old_val -= num_bytes; |
| 4099 | btrfs_set_super_bytes_used(&info->super_copy, old_val); | 4300 | btrfs_set_super_bytes_used(info->super_copy, old_val); |
| 4100 | spin_unlock(&info->delalloc_lock); | 4301 | spin_unlock(&info->delalloc_lock); |
| 4101 | 4302 | ||
| 4102 | while (total) { | 4303 | while (total) { |
| @@ -4124,7 +4325,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 4124 | spin_lock(&cache->space_info->lock); | 4325 | spin_lock(&cache->space_info->lock); |
| 4125 | spin_lock(&cache->lock); | 4326 | spin_lock(&cache->lock); |
| 4126 | 4327 | ||
| 4127 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | 4328 | if (btrfs_test_opt(root, SPACE_CACHE) && |
| 4128 | cache->disk_cache_state < BTRFS_DC_CLEAR) | 4329 | cache->disk_cache_state < BTRFS_DC_CLEAR) |
| 4129 | cache->disk_cache_state = BTRFS_DC_CLEAR; | 4330 | cache->disk_cache_state = BTRFS_DC_CLEAR; |
| 4130 | 4331 | ||
| @@ -4136,7 +4337,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 4136 | btrfs_set_block_group_used(&cache->item, old_val); | 4337 | btrfs_set_block_group_used(&cache->item, old_val); |
| 4137 | cache->reserved -= num_bytes; | 4338 | cache->reserved -= num_bytes; |
| 4138 | cache->space_info->bytes_reserved -= num_bytes; | 4339 | cache->space_info->bytes_reserved -= num_bytes; |
| 4139 | cache->space_info->reservation_progress++; | ||
| 4140 | cache->space_info->bytes_used += num_bytes; | 4340 | cache->space_info->bytes_used += num_bytes; |
| 4141 | cache->space_info->disk_used += num_bytes * factor; | 4341 | cache->space_info->disk_used += num_bytes * factor; |
| 4142 | spin_unlock(&cache->lock); | 4342 | spin_unlock(&cache->lock); |
| @@ -4188,7 +4388,6 @@ static int pin_down_extent(struct btrfs_root *root, | |||
| 4188 | if (reserved) { | 4388 | if (reserved) { |
| 4189 | cache->reserved -= num_bytes; | 4389 | cache->reserved -= num_bytes; |
| 4190 | cache->space_info->bytes_reserved -= num_bytes; | 4390 | cache->space_info->bytes_reserved -= num_bytes; |
| 4191 | cache->space_info->reservation_progress++; | ||
| 4192 | } | 4391 | } |
| 4193 | spin_unlock(&cache->lock); | 4392 | spin_unlock(&cache->lock); |
| 4194 | spin_unlock(&cache->space_info->lock); | 4393 | spin_unlock(&cache->space_info->lock); |
| @@ -4216,45 +4415,82 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
| 4216 | } | 4415 | } |
| 4217 | 4416 | ||
| 4218 | /* | 4417 | /* |
| 4219 | * update size of reserved extents. this function may return -EAGAIN | 4418 | * this function must be called within transaction |
| 4220 | * if 'reserve' is true or 'sinfo' is false. | 4419 | */ |
| 4420 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | ||
| 4421 | struct btrfs_root *root, | ||
| 4422 | u64 bytenr, u64 num_bytes) | ||
| 4423 | { | ||
| 4424 | struct btrfs_block_group_cache *cache; | ||
| 4425 | |||
| 4426 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
| 4427 | BUG_ON(!cache); | ||
| 4428 | |||
| 4429 | /* | ||
| 4430 | * pull in the free space cache (if any) so that our pin | ||
| 4431 | * removes the free space from the cache. We have load_only set | ||
| 4432 | * to one because the slow code to read in the free extents does check | ||
| 4433 | * the pinned extents. | ||
| 4434 | */ | ||
| 4435 | cache_block_group(cache, trans, root, 1); | ||
| 4436 | |||
| 4437 | pin_down_extent(root, cache, bytenr, num_bytes, 0); | ||
| 4438 | |||
| 4439 | /* remove us from the free space cache (if we're there at all) */ | ||
| 4440 | btrfs_remove_free_space(cache, bytenr, num_bytes); | ||
| 4441 | btrfs_put_block_group(cache); | ||
| 4442 | return 0; | ||
| 4443 | } | ||
| 4444 | |||
| 4445 | /** | ||
| 4446 | * btrfs_update_reserved_bytes - update the block_group and space info counters | ||
| 4447 | * @cache: The cache we are manipulating | ||
| 4448 | * @num_bytes: The number of bytes in question | ||
| 4449 | * @reserve: One of the reservation enums | ||
| 4450 | * | ||
| 4451 | * This is called by the allocator when it reserves space, or by somebody who is | ||
| 4452 | * freeing space that was never actually used on disk. For example if you | ||
| 4453 | * reserve some space for a new leaf in transaction A and before transaction A | ||
| 4454 | * commits you free that leaf, you call this with reserve set to 0 in order to | ||
| 4455 | * clear the reservation. | ||
| 4456 | * | ||
| 4457 | * Metadata reservations should be called with RESERVE_ALLOC so we do the proper | ||
| 4458 | * ENOSPC accounting. For data we handle the reservation through clearing the | ||
| 4459 | * delalloc bits in the io_tree. We have to do this since we could end up | ||
| 4460 | * allocating less disk space for the amount of data we have reserved in the | ||
| 4461 | * case of compression. | ||
| 4462 | * | ||
| 4463 | * If this is a reservation and the block group has become read only we cannot | ||
| 4464 | * make the reservation and return -EAGAIN, otherwise this function always | ||
| 4465 | * succeeds. | ||
| 4221 | */ | 4466 | */ |
| 4222 | int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 4467 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
| 4223 | u64 num_bytes, int reserve, int sinfo) | 4468 | u64 num_bytes, int reserve) |
| 4224 | { | 4469 | { |
| 4470 | struct btrfs_space_info *space_info = cache->space_info; | ||
| 4225 | int ret = 0; | 4471 | int ret = 0; |
| 4226 | if (sinfo) { | 4472 | spin_lock(&space_info->lock); |
| 4227 | struct btrfs_space_info *space_info = cache->space_info; | 4473 | spin_lock(&cache->lock); |
| 4228 | spin_lock(&space_info->lock); | 4474 | if (reserve != RESERVE_FREE) { |
| 4229 | spin_lock(&cache->lock); | ||
| 4230 | if (reserve) { | ||
| 4231 | if (cache->ro) { | ||
| 4232 | ret = -EAGAIN; | ||
| 4233 | } else { | ||
| 4234 | cache->reserved += num_bytes; | ||
| 4235 | space_info->bytes_reserved += num_bytes; | ||
| 4236 | } | ||
| 4237 | } else { | ||
| 4238 | if (cache->ro) | ||
| 4239 | space_info->bytes_readonly += num_bytes; | ||
| 4240 | cache->reserved -= num_bytes; | ||
| 4241 | space_info->bytes_reserved -= num_bytes; | ||
| 4242 | space_info->reservation_progress++; | ||
| 4243 | } | ||
| 4244 | spin_unlock(&cache->lock); | ||
| 4245 | spin_unlock(&space_info->lock); | ||
| 4246 | } else { | ||
| 4247 | spin_lock(&cache->lock); | ||
| 4248 | if (cache->ro) { | 4475 | if (cache->ro) { |
| 4249 | ret = -EAGAIN; | 4476 | ret = -EAGAIN; |
| 4250 | } else { | 4477 | } else { |
| 4251 | if (reserve) | 4478 | cache->reserved += num_bytes; |
| 4252 | cache->reserved += num_bytes; | 4479 | space_info->bytes_reserved += num_bytes; |
| 4253 | else | 4480 | if (reserve == RESERVE_ALLOC) { |
| 4254 | cache->reserved -= num_bytes; | 4481 | BUG_ON(space_info->bytes_may_use < num_bytes); |
| 4482 | space_info->bytes_may_use -= num_bytes; | ||
| 4483 | } | ||
| 4255 | } | 4484 | } |
| 4256 | spin_unlock(&cache->lock); | 4485 | } else { |
| 4486 | if (cache->ro) | ||
| 4487 | space_info->bytes_readonly += num_bytes; | ||
| 4488 | cache->reserved -= num_bytes; | ||
| 4489 | space_info->bytes_reserved -= num_bytes; | ||
| 4490 | space_info->reservation_progress++; | ||
| 4257 | } | 4491 | } |
| 4492 | spin_unlock(&cache->lock); | ||
| 4493 | spin_unlock(&space_info->lock); | ||
| 4258 | return ret; | 4494 | return ret; |
| 4259 | } | 4495 | } |
| 4260 | 4496 | ||
| @@ -4320,13 +4556,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 4320 | spin_lock(&cache->lock); | 4556 | spin_lock(&cache->lock); |
| 4321 | cache->pinned -= len; | 4557 | cache->pinned -= len; |
| 4322 | cache->space_info->bytes_pinned -= len; | 4558 | cache->space_info->bytes_pinned -= len; |
| 4323 | if (cache->ro) { | 4559 | if (cache->ro) |
| 4324 | cache->space_info->bytes_readonly += len; | 4560 | cache->space_info->bytes_readonly += len; |
| 4325 | } else if (cache->reserved_pinned > 0) { | ||
| 4326 | len = min(len, cache->reserved_pinned); | ||
| 4327 | cache->reserved_pinned -= len; | ||
| 4328 | cache->space_info->bytes_reserved += len; | ||
| 4329 | } | ||
| 4330 | spin_unlock(&cache->lock); | 4561 | spin_unlock(&cache->lock); |
| 4331 | spin_unlock(&cache->space_info->lock); | 4562 | spin_unlock(&cache->space_info->lock); |
| 4332 | } | 4563 | } |
| @@ -4341,11 +4572,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 4341 | { | 4572 | { |
| 4342 | struct btrfs_fs_info *fs_info = root->fs_info; | 4573 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 4343 | struct extent_io_tree *unpin; | 4574 | struct extent_io_tree *unpin; |
| 4344 | struct btrfs_block_rsv *block_rsv; | ||
| 4345 | struct btrfs_block_rsv *next_rsv; | ||
| 4346 | u64 start; | 4575 | u64 start; |
| 4347 | u64 end; | 4576 | u64 end; |
| 4348 | int idx; | ||
| 4349 | int ret; | 4577 | int ret; |
| 4350 | 4578 | ||
| 4351 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4579 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
| @@ -4368,30 +4596,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 4368 | cond_resched(); | 4596 | cond_resched(); |
| 4369 | } | 4597 | } |
| 4370 | 4598 | ||
| 4371 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
| 4372 | list_for_each_entry_safe(block_rsv, next_rsv, | ||
| 4373 | &fs_info->durable_block_rsv_list, list) { | ||
| 4374 | |||
| 4375 | idx = trans->transid & 0x1; | ||
| 4376 | if (block_rsv->freed[idx] > 0) { | ||
| 4377 | block_rsv_add_bytes(block_rsv, | ||
| 4378 | block_rsv->freed[idx], 0); | ||
| 4379 | block_rsv->freed[idx] = 0; | ||
| 4380 | } | ||
| 4381 | if (atomic_read(&block_rsv->usage) == 0) { | ||
| 4382 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); | ||
| 4383 | |||
| 4384 | if (block_rsv->freed[0] == 0 && | ||
| 4385 | block_rsv->freed[1] == 0) { | ||
| 4386 | list_del_init(&block_rsv->list); | ||
| 4387 | kfree(block_rsv); | ||
| 4388 | } | ||
| 4389 | } else { | ||
| 4390 | btrfs_block_rsv_release(root, block_rsv, 0); | ||
| 4391 | } | ||
| 4392 | } | ||
| 4393 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
| 4394 | |||
| 4395 | return 0; | 4599 | return 0; |
| 4396 | } | 4600 | } |
| 4397 | 4601 | ||
| @@ -4669,7 +4873,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
| 4669 | struct extent_buffer *buf, | 4873 | struct extent_buffer *buf, |
| 4670 | u64 parent, int last_ref) | 4874 | u64 parent, int last_ref) |
| 4671 | { | 4875 | { |
| 4672 | struct btrfs_block_rsv *block_rsv; | ||
| 4673 | struct btrfs_block_group_cache *cache = NULL; | 4876 | struct btrfs_block_group_cache *cache = NULL; |
| 4674 | int ret; | 4877 | int ret; |
| 4675 | 4878 | ||
| @@ -4684,64 +4887,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
| 4684 | if (!last_ref) | 4887 | if (!last_ref) |
| 4685 | return; | 4888 | return; |
| 4686 | 4889 | ||
| 4687 | block_rsv = get_block_rsv(trans, root); | ||
| 4688 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | 4890 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); |
| 4689 | if (block_rsv->space_info != cache->space_info) | ||
| 4690 | goto out; | ||
| 4691 | 4891 | ||
| 4692 | if (btrfs_header_generation(buf) == trans->transid) { | 4892 | if (btrfs_header_generation(buf) == trans->transid) { |
| 4693 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 4893 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
| 4694 | ret = check_ref_cleanup(trans, root, buf->start); | 4894 | ret = check_ref_cleanup(trans, root, buf->start); |
| 4695 | if (!ret) | 4895 | if (!ret) |
| 4696 | goto pin; | 4896 | goto out; |
| 4697 | } | 4897 | } |
| 4698 | 4898 | ||
| 4699 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 4899 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
| 4700 | pin_down_extent(root, cache, buf->start, buf->len, 1); | 4900 | pin_down_extent(root, cache, buf->start, buf->len, 1); |
| 4701 | goto pin; | 4901 | goto out; |
| 4702 | } | 4902 | } |
| 4703 | 4903 | ||
| 4704 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | 4904 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); |
| 4705 | 4905 | ||
| 4706 | btrfs_add_free_space(cache, buf->start, buf->len); | 4906 | btrfs_add_free_space(cache, buf->start, buf->len); |
| 4707 | ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); | 4907 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); |
| 4708 | if (ret == -EAGAIN) { | ||
| 4709 | /* block group became read-only */ | ||
| 4710 | btrfs_update_reserved_bytes(cache, buf->len, 0, 1); | ||
| 4711 | goto out; | ||
| 4712 | } | ||
| 4713 | |||
| 4714 | ret = 1; | ||
| 4715 | spin_lock(&block_rsv->lock); | ||
| 4716 | if (block_rsv->reserved < block_rsv->size) { | ||
| 4717 | block_rsv->reserved += buf->len; | ||
| 4718 | ret = 0; | ||
| 4719 | } | ||
| 4720 | spin_unlock(&block_rsv->lock); | ||
| 4721 | |||
| 4722 | if (ret) { | ||
| 4723 | spin_lock(&cache->space_info->lock); | ||
| 4724 | cache->space_info->bytes_reserved -= buf->len; | ||
| 4725 | cache->space_info->reservation_progress++; | ||
| 4726 | spin_unlock(&cache->space_info->lock); | ||
| 4727 | } | ||
| 4728 | goto out; | ||
| 4729 | } | ||
| 4730 | pin: | ||
| 4731 | if (block_rsv->durable && !cache->ro) { | ||
| 4732 | ret = 0; | ||
| 4733 | spin_lock(&cache->lock); | ||
| 4734 | if (!cache->ro) { | ||
| 4735 | cache->reserved_pinned += buf->len; | ||
| 4736 | ret = 1; | ||
| 4737 | } | ||
| 4738 | spin_unlock(&cache->lock); | ||
| 4739 | |||
| 4740 | if (ret) { | ||
| 4741 | spin_lock(&block_rsv->lock); | ||
| 4742 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
| 4743 | spin_unlock(&block_rsv->lock); | ||
| 4744 | } | ||
| 4745 | } | 4908 | } |
| 4746 | out: | 4909 | out: |
| 4747 | /* | 4910 | /* |
| @@ -4884,10 +5047,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4884 | int last_ptr_loop = 0; | 5047 | int last_ptr_loop = 0; |
| 4885 | int loop = 0; | 5048 | int loop = 0; |
| 4886 | int index = 0; | 5049 | int index = 0; |
| 5050 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? | ||
| 5051 | RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; | ||
| 4887 | bool found_uncached_bg = false; | 5052 | bool found_uncached_bg = false; |
| 4888 | bool failed_cluster_refill = false; | 5053 | bool failed_cluster_refill = false; |
| 4889 | bool failed_alloc = false; | 5054 | bool failed_alloc = false; |
| 4890 | bool use_cluster = true; | 5055 | bool use_cluster = true; |
| 5056 | bool have_caching_bg = false; | ||
| 4891 | u64 ideal_cache_percent = 0; | 5057 | u64 ideal_cache_percent = 0; |
| 4892 | u64 ideal_cache_offset = 0; | 5058 | u64 ideal_cache_offset = 0; |
| 4893 | 5059 | ||
| @@ -4970,6 +5136,7 @@ ideal_cache: | |||
| 4970 | } | 5136 | } |
| 4971 | } | 5137 | } |
| 4972 | search: | 5138 | search: |
| 5139 | have_caching_bg = false; | ||
| 4973 | down_read(&space_info->groups_sem); | 5140 | down_read(&space_info->groups_sem); |
| 4974 | list_for_each_entry(block_group, &space_info->block_groups[index], | 5141 | list_for_each_entry(block_group, &space_info->block_groups[index], |
| 4975 | list) { | 5142 | list) { |
| @@ -5178,6 +5345,8 @@ refill_cluster: | |||
| 5178 | failed_alloc = true; | 5345 | failed_alloc = true; |
| 5179 | goto have_block_group; | 5346 | goto have_block_group; |
| 5180 | } else if (!offset) { | 5347 | } else if (!offset) { |
| 5348 | if (!cached) | ||
| 5349 | have_caching_bg = true; | ||
| 5181 | goto loop; | 5350 | goto loop; |
| 5182 | } | 5351 | } |
| 5183 | checks: | 5352 | checks: |
| @@ -5203,8 +5372,8 @@ checks: | |||
| 5203 | search_start - offset); | 5372 | search_start - offset); |
| 5204 | BUG_ON(offset > search_start); | 5373 | BUG_ON(offset > search_start); |
| 5205 | 5374 | ||
| 5206 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, | 5375 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, |
| 5207 | (data & BTRFS_BLOCK_GROUP_DATA)); | 5376 | alloc_type); |
| 5208 | if (ret == -EAGAIN) { | 5377 | if (ret == -EAGAIN) { |
| 5209 | btrfs_add_free_space(block_group, offset, num_bytes); | 5378 | btrfs_add_free_space(block_group, offset, num_bytes); |
| 5210 | goto loop; | 5379 | goto loop; |
| @@ -5228,6 +5397,9 @@ loop: | |||
| 5228 | } | 5397 | } |
| 5229 | up_read(&space_info->groups_sem); | 5398 | up_read(&space_info->groups_sem); |
| 5230 | 5399 | ||
| 5400 | if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg) | ||
| 5401 | goto search; | ||
| 5402 | |||
| 5231 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | 5403 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) |
| 5232 | goto search; | 5404 | goto search; |
| 5233 | 5405 | ||
| @@ -5326,7 +5498,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 5326 | int index = 0; | 5498 | int index = 0; |
| 5327 | 5499 | ||
| 5328 | spin_lock(&info->lock); | 5500 | spin_lock(&info->lock); |
| 5329 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 5501 | printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", |
| 5502 | (unsigned long long)info->flags, | ||
| 5330 | (unsigned long long)(info->total_bytes - info->bytes_used - | 5503 | (unsigned long long)(info->total_bytes - info->bytes_used - |
| 5331 | info->bytes_pinned - info->bytes_reserved - | 5504 | info->bytes_pinned - info->bytes_reserved - |
| 5332 | info->bytes_readonly), | 5505 | info->bytes_readonly), |
| @@ -5412,7 +5585,8 @@ again: | |||
| 5412 | return ret; | 5585 | return ret; |
| 5413 | } | 5586 | } |
| 5414 | 5587 | ||
| 5415 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | 5588 | static int __btrfs_free_reserved_extent(struct btrfs_root *root, |
| 5589 | u64 start, u64 len, int pin) | ||
| 5416 | { | 5590 | { |
| 5417 | struct btrfs_block_group_cache *cache; | 5591 | struct btrfs_block_group_cache *cache; |
| 5418 | int ret = 0; | 5592 | int ret = 0; |
| @@ -5427,8 +5601,12 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 5427 | if (btrfs_test_opt(root, DISCARD)) | 5601 | if (btrfs_test_opt(root, DISCARD)) |
| 5428 | ret = btrfs_discard_extent(root, start, len, NULL); | 5602 | ret = btrfs_discard_extent(root, start, len, NULL); |
| 5429 | 5603 | ||
| 5430 | btrfs_add_free_space(cache, start, len); | 5604 | if (pin) |
| 5431 | btrfs_update_reserved_bytes(cache, len, 0, 1); | 5605 | pin_down_extent(root, cache, start, len, 1); |
| 5606 | else { | ||
| 5607 | btrfs_add_free_space(cache, start, len); | ||
| 5608 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); | ||
| 5609 | } | ||
| 5432 | btrfs_put_block_group(cache); | 5610 | btrfs_put_block_group(cache); |
| 5433 | 5611 | ||
| 5434 | trace_btrfs_reserved_extent_free(root, start, len); | 5612 | trace_btrfs_reserved_extent_free(root, start, len); |
| @@ -5436,6 +5614,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 5436 | return ret; | 5614 | return ret; |
| 5437 | } | 5615 | } |
| 5438 | 5616 | ||
| 5617 | int btrfs_free_reserved_extent(struct btrfs_root *root, | ||
| 5618 | u64 start, u64 len) | ||
| 5619 | { | ||
| 5620 | return __btrfs_free_reserved_extent(root, start, len, 0); | ||
| 5621 | } | ||
| 5622 | |||
| 5623 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | ||
| 5624 | u64 start, u64 len) | ||
| 5625 | { | ||
| 5626 | return __btrfs_free_reserved_extent(root, start, len, 1); | ||
| 5627 | } | ||
| 5628 | |||
| 5439 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | 5629 | static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
| 5440 | struct btrfs_root *root, | 5630 | struct btrfs_root *root, |
| 5441 | u64 parent, u64 root_objectid, | 5631 | u64 parent, u64 root_objectid, |
| @@ -5631,7 +5821,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 5631 | put_caching_control(caching_ctl); | 5821 | put_caching_control(caching_ctl); |
| 5632 | } | 5822 | } |
| 5633 | 5823 | ||
| 5634 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); | 5824 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, |
| 5825 | RESERVE_ALLOC_NO_ACCOUNT); | ||
| 5635 | BUG_ON(ret); | 5826 | BUG_ON(ret); |
| 5636 | btrfs_put_block_group(block_group); | 5827 | btrfs_put_block_group(block_group); |
| 5637 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5828 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
| @@ -5688,8 +5879,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 5688 | block_rsv = get_block_rsv(trans, root); | 5879 | block_rsv = get_block_rsv(trans, root); |
| 5689 | 5880 | ||
| 5690 | if (block_rsv->size == 0) { | 5881 | if (block_rsv->size == 0) { |
| 5691 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 5882 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); |
| 5692 | blocksize, 0); | ||
| 5693 | /* | 5883 | /* |
| 5694 | * If we couldn't reserve metadata bytes try and use some from | 5884 | * If we couldn't reserve metadata bytes try and use some from |
| 5695 | * the global reserve. | 5885 | * the global reserve. |
| @@ -5709,13 +5899,15 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 5709 | if (!ret) | 5899 | if (!ret) |
| 5710 | return block_rsv; | 5900 | return block_rsv; |
| 5711 | if (ret) { | 5901 | if (ret) { |
| 5712 | WARN_ON(1); | 5902 | static DEFINE_RATELIMIT_STATE(_rs, |
| 5713 | ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, | 5903 | DEFAULT_RATELIMIT_INTERVAL, |
| 5714 | 0); | 5904 | /*DEFAULT_RATELIMIT_BURST*/ 2); |
| 5905 | if (__ratelimit(&_rs)) { | ||
| 5906 | printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); | ||
| 5907 | WARN_ON(1); | ||
| 5908 | } | ||
| 5909 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | ||
| 5715 | if (!ret) { | 5910 | if (!ret) { |
| 5716 | spin_lock(&block_rsv->lock); | ||
| 5717 | block_rsv->size += blocksize; | ||
| 5718 | spin_unlock(&block_rsv->lock); | ||
| 5719 | return block_rsv; | 5911 | return block_rsv; |
| 5720 | } else if (ret && block_rsv != global_rsv) { | 5912 | } else if (ret && block_rsv != global_rsv) { |
| 5721 | ret = block_rsv_use_bytes(global_rsv, blocksize); | 5913 | ret = block_rsv_use_bytes(global_rsv, blocksize); |
| @@ -6593,12 +6785,9 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) | |||
| 6593 | cache->bytes_super - btrfs_block_group_used(&cache->item); | 6785 | cache->bytes_super - btrfs_block_group_used(&cache->item); |
| 6594 | 6786 | ||
| 6595 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6787 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
| 6596 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6788 | sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes + |
| 6597 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= | 6789 | min_allocable_bytes <= sinfo->total_bytes) { |
| 6598 | sinfo->total_bytes) { | ||
| 6599 | sinfo->bytes_readonly += num_bytes; | 6790 | sinfo->bytes_readonly += num_bytes; |
| 6600 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
| 6601 | cache->reserved_pinned = 0; | ||
| 6602 | cache->ro = 1; | 6791 | cache->ro = 1; |
| 6603 | ret = 0; | 6792 | ret = 0; |
| 6604 | } | 6793 | } |
| @@ -6965,7 +7154,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 6965 | struct btrfs_space_info, | 7154 | struct btrfs_space_info, |
| 6966 | list); | 7155 | list); |
| 6967 | if (space_info->bytes_pinned > 0 || | 7156 | if (space_info->bytes_pinned > 0 || |
| 6968 | space_info->bytes_reserved > 0) { | 7157 | space_info->bytes_reserved > 0 || |
| 7158 | space_info->bytes_may_use > 0) { | ||
| 6969 | WARN_ON(1); | 7159 | WARN_ON(1); |
| 6970 | dump_space_info(space_info, 0, 0); | 7160 | dump_space_info(space_info, 0, 0); |
| 6971 | } | 7161 | } |
| @@ -7007,14 +7197,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7007 | return -ENOMEM; | 7197 | return -ENOMEM; |
| 7008 | path->reada = 1; | 7198 | path->reada = 1; |
| 7009 | 7199 | ||
| 7010 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | 7200 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); |
| 7011 | if (cache_gen != 0 && | 7201 | if (btrfs_test_opt(root, SPACE_CACHE) && |
| 7012 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | 7202 | btrfs_super_generation(root->fs_info->super_copy) != cache_gen) |
| 7013 | need_clear = 1; | 7203 | need_clear = 1; |
| 7014 | if (btrfs_test_opt(root, CLEAR_CACHE)) | 7204 | if (btrfs_test_opt(root, CLEAR_CACHE)) |
| 7015 | need_clear = 1; | 7205 | need_clear = 1; |
| 7016 | if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) | ||
| 7017 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
| 7018 | 7206 | ||
| 7019 | while (1) { | 7207 | while (1) { |
| 7020 | ret = find_first_block_group(root, path, &key); | 7208 | ret = find_first_block_group(root, path, &key); |
| @@ -7253,7 +7441,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 7253 | goto out; | 7441 | goto out; |
| 7254 | } | 7442 | } |
| 7255 | 7443 | ||
| 7256 | inode = lookup_free_space_inode(root, block_group, path); | 7444 | inode = lookup_free_space_inode(tree_root, block_group, path); |
| 7257 | if (!IS_ERR(inode)) { | 7445 | if (!IS_ERR(inode)) { |
| 7258 | ret = btrfs_orphan_add(trans, inode); | 7446 | ret = btrfs_orphan_add(trans, inode); |
| 7259 | BUG_ON(ret); | 7447 | BUG_ON(ret); |
| @@ -7269,7 +7457,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 7269 | spin_unlock(&block_group->lock); | 7457 | spin_unlock(&block_group->lock); |
| 7270 | } | 7458 | } |
| 7271 | /* One for our lookup ref */ | 7459 | /* One for our lookup ref */ |
| 7272 | iput(inode); | 7460 | btrfs_add_delayed_iput(inode); |
| 7273 | } | 7461 | } |
| 7274 | 7462 | ||
| 7275 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 7463 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
| @@ -7340,7 +7528,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | |||
| 7340 | int mixed = 0; | 7528 | int mixed = 0; |
| 7341 | int ret; | 7529 | int ret; |
| 7342 | 7530 | ||
| 7343 | disk_super = &fs_info->super_copy; | 7531 | disk_super = fs_info->super_copy; |
| 7344 | if (!btrfs_super_root(disk_super)) | 7532 | if (!btrfs_super_root(disk_super)) |
| 7345 | return 1; | 7533 | return 1; |
| 7346 | 7534 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d418164a35f1..1f87c4d0e7a0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "compat.h" | 17 | #include "compat.h" |
| 18 | #include "ctree.h" | 18 | #include "ctree.h" |
| 19 | #include "btrfs_inode.h" | 19 | #include "btrfs_inode.h" |
| 20 | #include "volumes.h" | ||
| 20 | 21 | ||
| 21 | static struct kmem_cache *extent_state_cache; | 22 | static struct kmem_cache *extent_state_cache; |
| 22 | static struct kmem_cache *extent_buffer_cache; | 23 | static struct kmem_cache *extent_buffer_cache; |
| @@ -894,6 +895,194 @@ search_again: | |||
| 894 | goto again; | 895 | goto again; |
| 895 | } | 896 | } |
| 896 | 897 | ||
| 898 | /** | ||
| 899 | * convert_extent - convert all bits in a given range from one bit to another | ||
| 900 | * @tree: the io tree to search | ||
| 901 | * @start: the start offset in bytes | ||
| 902 | * @end: the end offset in bytes (inclusive) | ||
| 903 | * @bits: the bits to set in this range | ||
| 904 | * @clear_bits: the bits to clear in this range | ||
| 905 | * @mask: the allocation mask | ||
| 906 | * | ||
| 907 | * This will go through and set bits for the given range. If any states exist | ||
| 908 | * already in this range they are set with the given bit and cleared of the | ||
| 909 | * clear_bits. This is only meant to be used by things that are mergeable, ie | ||
| 910 | * converting from say DELALLOC to DIRTY. This is not meant to be used with | ||
| 911 | * boundary bits like LOCK. | ||
| 912 | */ | ||
| 913 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 914 | int bits, int clear_bits, gfp_t mask) | ||
| 915 | { | ||
| 916 | struct extent_state *state; | ||
| 917 | struct extent_state *prealloc = NULL; | ||
| 918 | struct rb_node *node; | ||
| 919 | int err = 0; | ||
| 920 | u64 last_start; | ||
| 921 | u64 last_end; | ||
| 922 | |||
| 923 | again: | ||
| 924 | if (!prealloc && (mask & __GFP_WAIT)) { | ||
| 925 | prealloc = alloc_extent_state(mask); | ||
| 926 | if (!prealloc) | ||
| 927 | return -ENOMEM; | ||
| 928 | } | ||
| 929 | |||
| 930 | spin_lock(&tree->lock); | ||
| 931 | /* | ||
| 932 | * this search will find all the extents that end after | ||
| 933 | * our range starts. | ||
| 934 | */ | ||
| 935 | node = tree_search(tree, start); | ||
| 936 | if (!node) { | ||
| 937 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 938 | if (!prealloc) | ||
| 939 | return -ENOMEM; | ||
| 940 | err = insert_state(tree, prealloc, start, end, &bits); | ||
| 941 | prealloc = NULL; | ||
| 942 | BUG_ON(err == -EEXIST); | ||
| 943 | goto out; | ||
| 944 | } | ||
| 945 | state = rb_entry(node, struct extent_state, rb_node); | ||
| 946 | hit_next: | ||
| 947 | last_start = state->start; | ||
| 948 | last_end = state->end; | ||
| 949 | |||
| 950 | /* | ||
| 951 | * | ---- desired range ---- | | ||
| 952 | * | state | | ||
| 953 | * | ||
| 954 | * Just lock what we found and keep going | ||
| 955 | */ | ||
| 956 | if (state->start == start && state->end <= end) { | ||
| 957 | struct rb_node *next_node; | ||
| 958 | |||
| 959 | set_state_bits(tree, state, &bits); | ||
| 960 | clear_state_bit(tree, state, &clear_bits, 0); | ||
| 961 | |||
| 962 | merge_state(tree, state); | ||
| 963 | if (last_end == (u64)-1) | ||
| 964 | goto out; | ||
| 965 | |||
| 966 | start = last_end + 1; | ||
| 967 | next_node = rb_next(&state->rb_node); | ||
| 968 | if (next_node && start < end && prealloc && !need_resched()) { | ||
| 969 | state = rb_entry(next_node, struct extent_state, | ||
| 970 | rb_node); | ||
| 971 | if (state->start == start) | ||
| 972 | goto hit_next; | ||
| 973 | } | ||
| 974 | goto search_again; | ||
| 975 | } | ||
| 976 | |||
| 977 | /* | ||
| 978 | * | ---- desired range ---- | | ||
| 979 | * | state | | ||
| 980 | * or | ||
| 981 | * | ------------- state -------------- | | ||
| 982 | * | ||
| 983 | * We need to split the extent we found, and may flip bits on | ||
| 984 | * second half. | ||
| 985 | * | ||
| 986 | * If the extent we found extends past our | ||
| 987 | * range, we just split and search again. It'll get split | ||
| 988 | * again the next time though. | ||
| 989 | * | ||
| 990 | * If the extent we found is inside our range, we set the | ||
| 991 | * desired bit on it. | ||
| 992 | */ | ||
| 993 | if (state->start < start) { | ||
| 994 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 995 | if (!prealloc) | ||
| 996 | return -ENOMEM; | ||
| 997 | err = split_state(tree, state, prealloc, start); | ||
| 998 | BUG_ON(err == -EEXIST); | ||
| 999 | prealloc = NULL; | ||
| 1000 | if (err) | ||
| 1001 | goto out; | ||
| 1002 | if (state->end <= end) { | ||
| 1003 | set_state_bits(tree, state, &bits); | ||
| 1004 | clear_state_bit(tree, state, &clear_bits, 0); | ||
| 1005 | merge_state(tree, state); | ||
| 1006 | if (last_end == (u64)-1) | ||
| 1007 | goto out; | ||
| 1008 | start = last_end + 1; | ||
| 1009 | } | ||
| 1010 | goto search_again; | ||
| 1011 | } | ||
| 1012 | /* | ||
| 1013 | * | ---- desired range ---- | | ||
| 1014 | * | state | or | state | | ||
| 1015 | * | ||
| 1016 | * There's a hole, we need to insert something in it and | ||
| 1017 | * ignore the extent we found. | ||
| 1018 | */ | ||
| 1019 | if (state->start > start) { | ||
| 1020 | u64 this_end; | ||
| 1021 | if (end < last_start) | ||
| 1022 | this_end = end; | ||
| 1023 | else | ||
| 1024 | this_end = last_start - 1; | ||
| 1025 | |||
| 1026 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 1027 | if (!prealloc) | ||
| 1028 | return -ENOMEM; | ||
| 1029 | |||
| 1030 | /* | ||
| 1031 | * Avoid to free 'prealloc' if it can be merged with | ||
| 1032 | * the later extent. | ||
| 1033 | */ | ||
| 1034 | err = insert_state(tree, prealloc, start, this_end, | ||
| 1035 | &bits); | ||
| 1036 | BUG_ON(err == -EEXIST); | ||
| 1037 | if (err) { | ||
| 1038 | free_extent_state(prealloc); | ||
| 1039 | prealloc = NULL; | ||
| 1040 | goto out; | ||
| 1041 | } | ||
| 1042 | prealloc = NULL; | ||
| 1043 | start = this_end + 1; | ||
| 1044 | goto search_again; | ||
| 1045 | } | ||
| 1046 | /* | ||
| 1047 | * | ---- desired range ---- | | ||
| 1048 | * | state | | ||
| 1049 | * We need to split the extent, and set the bit | ||
| 1050 | * on the first half | ||
| 1051 | */ | ||
| 1052 | if (state->start <= end && state->end > end) { | ||
| 1053 | prealloc = alloc_extent_state_atomic(prealloc); | ||
| 1054 | if (!prealloc) | ||
| 1055 | return -ENOMEM; | ||
| 1056 | |||
| 1057 | err = split_state(tree, state, prealloc, end + 1); | ||
| 1058 | BUG_ON(err == -EEXIST); | ||
| 1059 | |||
| 1060 | set_state_bits(tree, prealloc, &bits); | ||
| 1061 | clear_state_bit(tree, prealloc, &clear_bits, 0); | ||
| 1062 | |||
| 1063 | merge_state(tree, prealloc); | ||
| 1064 | prealloc = NULL; | ||
| 1065 | goto out; | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | goto search_again; | ||
| 1069 | |||
| 1070 | out: | ||
| 1071 | spin_unlock(&tree->lock); | ||
| 1072 | if (prealloc) | ||
| 1073 | free_extent_state(prealloc); | ||
| 1074 | |||
| 1075 | return err; | ||
| 1076 | |||
| 1077 | search_again: | ||
| 1078 | if (start > end) | ||
| 1079 | goto out; | ||
| 1080 | spin_unlock(&tree->lock); | ||
| 1081 | if (mask & __GFP_WAIT) | ||
| 1082 | cond_resched(); | ||
| 1083 | goto again; | ||
| 1084 | } | ||
| 1085 | |||
| 897 | /* wrappers around set/clear extent bit */ | 1086 | /* wrappers around set/clear extent bit */ |
| 898 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 1087 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 899 | gfp_t mask) | 1088 | gfp_t mask) |
| @@ -919,7 +1108,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 919 | struct extent_state **cached_state, gfp_t mask) | 1108 | struct extent_state **cached_state, gfp_t mask) |
| 920 | { | 1109 | { |
| 921 | return set_extent_bit(tree, start, end, | 1110 | return set_extent_bit(tree, start, end, |
| 922 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, | 1111 | EXTENT_DELALLOC | EXTENT_UPTODATE, |
| 923 | 0, NULL, cached_state, mask); | 1112 | 0, NULL, cached_state, mask); |
| 924 | } | 1113 | } |
| 925 | 1114 | ||
| @@ -1599,6 +1788,368 @@ static int check_page_writeback(struct extent_io_tree *tree, | |||
| 1599 | return 0; | 1788 | return 0; |
| 1600 | } | 1789 | } |
| 1601 | 1790 | ||
| 1791 | /* | ||
| 1792 | * When IO fails, either with EIO or csum verification fails, we | ||
| 1793 | * try other mirrors that might have a good copy of the data. This | ||
| 1794 | * io_failure_record is used to record state as we go through all the | ||
| 1795 | * mirrors. If another mirror has good data, the page is set up to date | ||
| 1796 | * and things continue. If a good mirror can't be found, the original | ||
| 1797 | * bio end_io callback is called to indicate things have failed. | ||
| 1798 | */ | ||
| 1799 | struct io_failure_record { | ||
| 1800 | struct page *page; | ||
| 1801 | u64 start; | ||
| 1802 | u64 len; | ||
| 1803 | u64 logical; | ||
| 1804 | unsigned long bio_flags; | ||
| 1805 | int this_mirror; | ||
| 1806 | int failed_mirror; | ||
| 1807 | int in_validation; | ||
| 1808 | }; | ||
| 1809 | |||
| 1810 | static int free_io_failure(struct inode *inode, struct io_failure_record *rec, | ||
| 1811 | int did_repair) | ||
| 1812 | { | ||
| 1813 | int ret; | ||
| 1814 | int err = 0; | ||
| 1815 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
| 1816 | |||
| 1817 | set_state_private(failure_tree, rec->start, 0); | ||
| 1818 | ret = clear_extent_bits(failure_tree, rec->start, | ||
| 1819 | rec->start + rec->len - 1, | ||
| 1820 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | ||
| 1821 | if (ret) | ||
| 1822 | err = ret; | ||
| 1823 | |||
| 1824 | if (did_repair) { | ||
| 1825 | ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start, | ||
| 1826 | rec->start + rec->len - 1, | ||
| 1827 | EXTENT_DAMAGED, GFP_NOFS); | ||
| 1828 | if (ret && !err) | ||
| 1829 | err = ret; | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | kfree(rec); | ||
| 1833 | return err; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | static void repair_io_failure_callback(struct bio *bio, int err) | ||
| 1837 | { | ||
| 1838 | complete(bio->bi_private); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * this bypasses the standard btrfs submit functions deliberately, as | ||
| 1843 | * the standard behavior is to write all copies in a raid setup. here we only | ||
| 1844 | * want to write the one bad copy. so we do the mapping for ourselves and issue | ||
| 1845 | * submit_bio directly. | ||
| 1846 | * to avoid any synchonization issues, wait for the data after writing, which | ||
| 1847 | * actually prevents the read that triggered the error from finishing. | ||
| 1848 | * currently, there can be no more than two copies of every data bit. thus, | ||
| 1849 | * exactly one rewrite is required. | ||
| 1850 | */ | ||
| 1851 | int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | ||
| 1852 | u64 length, u64 logical, struct page *page, | ||
| 1853 | int mirror_num) | ||
| 1854 | { | ||
| 1855 | struct bio *bio; | ||
| 1856 | struct btrfs_device *dev; | ||
| 1857 | DECLARE_COMPLETION_ONSTACK(compl); | ||
| 1858 | u64 map_length = 0; | ||
| 1859 | u64 sector; | ||
| 1860 | struct btrfs_bio *bbio = NULL; | ||
| 1861 | int ret; | ||
| 1862 | |||
| 1863 | BUG_ON(!mirror_num); | ||
| 1864 | |||
| 1865 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 1866 | if (!bio) | ||
| 1867 | return -EIO; | ||
| 1868 | bio->bi_private = &compl; | ||
| 1869 | bio->bi_end_io = repair_io_failure_callback; | ||
| 1870 | bio->bi_size = 0; | ||
| 1871 | map_length = length; | ||
| 1872 | |||
| 1873 | ret = btrfs_map_block(map_tree, WRITE, logical, | ||
| 1874 | &map_length, &bbio, mirror_num); | ||
| 1875 | if (ret) { | ||
| 1876 | bio_put(bio); | ||
| 1877 | return -EIO; | ||
| 1878 | } | ||
| 1879 | BUG_ON(mirror_num != bbio->mirror_num); | ||
| 1880 | sector = bbio->stripes[mirror_num-1].physical >> 9; | ||
| 1881 | bio->bi_sector = sector; | ||
| 1882 | dev = bbio->stripes[mirror_num-1].dev; | ||
| 1883 | kfree(bbio); | ||
| 1884 | if (!dev || !dev->bdev || !dev->writeable) { | ||
| 1885 | bio_put(bio); | ||
| 1886 | return -EIO; | ||
| 1887 | } | ||
| 1888 | bio->bi_bdev = dev->bdev; | ||
| 1889 | bio_add_page(bio, page, length, start-page_offset(page)); | ||
| 1890 | submit_bio(WRITE_SYNC, bio); | ||
| 1891 | wait_for_completion(&compl); | ||
| 1892 | |||
| 1893 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | ||
| 1894 | /* try to remap that extent elsewhere? */ | ||
| 1895 | bio_put(bio); | ||
| 1896 | return -EIO; | ||
| 1897 | } | ||
| 1898 | |||
| 1899 | printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " | ||
| 1900 | "sector %llu)\n", page->mapping->host->i_ino, start, | ||
| 1901 | dev->name, sector); | ||
| 1902 | |||
| 1903 | bio_put(bio); | ||
| 1904 | return 0; | ||
| 1905 | } | ||
| 1906 | |||
| 1907 | /* | ||
| 1908 | * each time an IO finishes, we do a fast check in the IO failure tree | ||
| 1909 | * to see if we need to process or clean up an io_failure_record | ||
| 1910 | */ | ||
| 1911 | static int clean_io_failure(u64 start, struct page *page) | ||
| 1912 | { | ||
| 1913 | u64 private; | ||
| 1914 | u64 private_failure; | ||
| 1915 | struct io_failure_record *failrec; | ||
| 1916 | struct btrfs_mapping_tree *map_tree; | ||
| 1917 | struct extent_state *state; | ||
| 1918 | int num_copies; | ||
| 1919 | int did_repair = 0; | ||
| 1920 | int ret; | ||
| 1921 | struct inode *inode = page->mapping->host; | ||
| 1922 | |||
| 1923 | private = 0; | ||
| 1924 | ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | ||
| 1925 | (u64)-1, 1, EXTENT_DIRTY, 0); | ||
| 1926 | if (!ret) | ||
| 1927 | return 0; | ||
| 1928 | |||
| 1929 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start, | ||
| 1930 | &private_failure); | ||
| 1931 | if (ret) | ||
| 1932 | return 0; | ||
| 1933 | |||
| 1934 | failrec = (struct io_failure_record *)(unsigned long) private_failure; | ||
| 1935 | BUG_ON(!failrec->this_mirror); | ||
| 1936 | |||
| 1937 | if (failrec->in_validation) { | ||
| 1938 | /* there was no real error, just free the record */ | ||
| 1939 | pr_debug("clean_io_failure: freeing dummy error at %llu\n", | ||
| 1940 | failrec->start); | ||
| 1941 | did_repair = 1; | ||
| 1942 | goto out; | ||
| 1943 | } | ||
| 1944 | |||
| 1945 | spin_lock(&BTRFS_I(inode)->io_tree.lock); | ||
| 1946 | state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, | ||
| 1947 | failrec->start, | ||
| 1948 | EXTENT_LOCKED); | ||
| 1949 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); | ||
| 1950 | |||
| 1951 | if (state && state->start == failrec->start) { | ||
| 1952 | map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; | ||
| 1953 | num_copies = btrfs_num_copies(map_tree, failrec->logical, | ||
| 1954 | failrec->len); | ||
| 1955 | if (num_copies > 1) { | ||
| 1956 | ret = repair_io_failure(map_tree, start, failrec->len, | ||
| 1957 | failrec->logical, page, | ||
| 1958 | failrec->failed_mirror); | ||
| 1959 | did_repair = !ret; | ||
| 1960 | } | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | out: | ||
| 1964 | if (!ret) | ||
| 1965 | ret = free_io_failure(inode, failrec, did_repair); | ||
| 1966 | |||
| 1967 | return ret; | ||
| 1968 | } | ||
| 1969 | |||
| 1970 | /* | ||
| 1971 | * this is a generic handler for readpage errors (default | ||
| 1972 | * readpage_io_failed_hook). if other copies exist, read those and write back | ||
| 1973 | * good data to the failed position. does not investigate in remapping the | ||
| 1974 | * failed extent elsewhere, hoping the device will be smart enough to do this as | ||
| 1975 | * needed | ||
| 1976 | */ | ||
| 1977 | |||
| 1978 | static int bio_readpage_error(struct bio *failed_bio, struct page *page, | ||
| 1979 | u64 start, u64 end, int failed_mirror, | ||
| 1980 | struct extent_state *state) | ||
| 1981 | { | ||
| 1982 | struct io_failure_record *failrec = NULL; | ||
| 1983 | u64 private; | ||
| 1984 | struct extent_map *em; | ||
| 1985 | struct inode *inode = page->mapping->host; | ||
| 1986 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
| 1987 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
| 1988 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 1989 | struct bio *bio; | ||
| 1990 | int num_copies; | ||
| 1991 | int ret; | ||
| 1992 | int read_mode; | ||
| 1993 | u64 logical; | ||
| 1994 | |||
| 1995 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
| 1996 | |||
| 1997 | ret = get_state_private(failure_tree, start, &private); | ||
| 1998 | if (ret) { | ||
| 1999 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); | ||
| 2000 | if (!failrec) | ||
| 2001 | return -ENOMEM; | ||
| 2002 | failrec->start = start; | ||
| 2003 | failrec->len = end - start + 1; | ||
| 2004 | failrec->this_mirror = 0; | ||
| 2005 | failrec->bio_flags = 0; | ||
| 2006 | failrec->in_validation = 0; | ||
| 2007 | |||
| 2008 | read_lock(&em_tree->lock); | ||
| 2009 | em = lookup_extent_mapping(em_tree, start, failrec->len); | ||
| 2010 | if (!em) { | ||
| 2011 | read_unlock(&em_tree->lock); | ||
| 2012 | kfree(failrec); | ||
| 2013 | return -EIO; | ||
| 2014 | } | ||
| 2015 | |||
| 2016 | if (em->start > start || em->start + em->len < start) { | ||
| 2017 | free_extent_map(em); | ||
| 2018 | em = NULL; | ||
| 2019 | } | ||
| 2020 | read_unlock(&em_tree->lock); | ||
| 2021 | |||
| 2022 | if (!em || IS_ERR(em)) { | ||
| 2023 | kfree(failrec); | ||
| 2024 | return -EIO; | ||
| 2025 | } | ||
| 2026 | logical = start - em->start; | ||
| 2027 | logical = em->block_start + logical; | ||
| 2028 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | ||
| 2029 | logical = em->block_start; | ||
| 2030 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | ||
| 2031 | extent_set_compress_type(&failrec->bio_flags, | ||
| 2032 | em->compress_type); | ||
| 2033 | } | ||
| 2034 | pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, " | ||
| 2035 | "len=%llu\n", logical, start, failrec->len); | ||
| 2036 | failrec->logical = logical; | ||
| 2037 | free_extent_map(em); | ||
| 2038 | |||
| 2039 | /* set the bits in the private failure tree */ | ||
| 2040 | ret = set_extent_bits(failure_tree, start, end, | ||
| 2041 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | ||
| 2042 | if (ret >= 0) | ||
| 2043 | ret = set_state_private(failure_tree, start, | ||
| 2044 | (u64)(unsigned long)failrec); | ||
| 2045 | /* set the bits in the inode's tree */ | ||
| 2046 | if (ret >= 0) | ||
| 2047 | ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, | ||
| 2048 | GFP_NOFS); | ||
| 2049 | if (ret < 0) { | ||
| 2050 | kfree(failrec); | ||
| 2051 | return ret; | ||
| 2052 | } | ||
| 2053 | } else { | ||
| 2054 | failrec = (struct io_failure_record *)(unsigned long)private; | ||
| 2055 | pr_debug("bio_readpage_error: (found) logical=%llu, " | ||
| 2056 | "start=%llu, len=%llu, validation=%d\n", | ||
| 2057 | failrec->logical, failrec->start, failrec->len, | ||
| 2058 | failrec->in_validation); | ||
| 2059 | /* | ||
| 2060 | * when data can be on disk more than twice, add to failrec here | ||
| 2061 | * (e.g. with a list for failed_mirror) to make | ||
| 2062 | * clean_io_failure() clean all those errors at once. | ||
| 2063 | */ | ||
| 2064 | } | ||
| 2065 | num_copies = btrfs_num_copies( | ||
| 2066 | &BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
| 2067 | failrec->logical, failrec->len); | ||
| 2068 | if (num_copies == 1) { | ||
| 2069 | /* | ||
| 2070 | * we only have a single copy of the data, so don't bother with | ||
| 2071 | * all the retry and error correction code that follows. no | ||
| 2072 | * matter what the error is, it is very likely to persist. | ||
| 2073 | */ | ||
| 2074 | pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " | ||
| 2075 | "state=%p, num_copies=%d, next_mirror %d, " | ||
| 2076 | "failed_mirror %d\n", state, num_copies, | ||
| 2077 | failrec->this_mirror, failed_mirror); | ||
| 2078 | free_io_failure(inode, failrec, 0); | ||
| 2079 | return -EIO; | ||
| 2080 | } | ||
| 2081 | |||
| 2082 | if (!state) { | ||
| 2083 | spin_lock(&tree->lock); | ||
| 2084 | state = find_first_extent_bit_state(tree, failrec->start, | ||
| 2085 | EXTENT_LOCKED); | ||
| 2086 | if (state && state->start != failrec->start) | ||
| 2087 | state = NULL; | ||
| 2088 | spin_unlock(&tree->lock); | ||
| 2089 | } | ||
| 2090 | |||
| 2091 | /* | ||
| 2092 | * there are two premises: | ||
| 2093 | * a) deliver good data to the caller | ||
| 2094 | * b) correct the bad sectors on disk | ||
| 2095 | */ | ||
| 2096 | if (failed_bio->bi_vcnt > 1) { | ||
| 2097 | /* | ||
| 2098 | * to fulfill b), we need to know the exact failing sectors, as | ||
| 2099 | * we don't want to rewrite any more than the failed ones. thus, | ||
| 2100 | * we need separate read requests for the failed bio | ||
| 2101 | * | ||
| 2102 | * if the following BUG_ON triggers, our validation request got | ||
| 2103 | * merged. we need separate requests for our algorithm to work. | ||
| 2104 | */ | ||
| 2105 | BUG_ON(failrec->in_validation); | ||
| 2106 | failrec->in_validation = 1; | ||
| 2107 | failrec->this_mirror = failed_mirror; | ||
| 2108 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
| 2109 | } else { | ||
| 2110 | /* | ||
| 2111 | * we're ready to fulfill a) and b) alongside. get a good copy | ||
| 2112 | * of the failed sector and if we succeed, we have setup | ||
| 2113 | * everything for repair_io_failure to do the rest for us. | ||
| 2114 | */ | ||
| 2115 | if (failrec->in_validation) { | ||
| 2116 | BUG_ON(failrec->this_mirror != failed_mirror); | ||
| 2117 | failrec->in_validation = 0; | ||
| 2118 | failrec->this_mirror = 0; | ||
| 2119 | } | ||
| 2120 | failrec->failed_mirror = failed_mirror; | ||
| 2121 | failrec->this_mirror++; | ||
| 2122 | if (failrec->this_mirror == failed_mirror) | ||
| 2123 | failrec->this_mirror++; | ||
| 2124 | read_mode = READ_SYNC; | ||
| 2125 | } | ||
| 2126 | |||
| 2127 | if (!state || failrec->this_mirror > num_copies) { | ||
| 2128 | pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " | ||
| 2129 | "next_mirror %d, failed_mirror %d\n", state, | ||
| 2130 | num_copies, failrec->this_mirror, failed_mirror); | ||
| 2131 | free_io_failure(inode, failrec, 0); | ||
| 2132 | return -EIO; | ||
| 2133 | } | ||
| 2134 | |||
| 2135 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 2136 | bio->bi_private = state; | ||
| 2137 | bio->bi_end_io = failed_bio->bi_end_io; | ||
| 2138 | bio->bi_sector = failrec->logical >> 9; | ||
| 2139 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 2140 | bio->bi_size = 0; | ||
| 2141 | |||
| 2142 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | ||
| 2143 | |||
| 2144 | pr_debug("bio_readpage_error: submitting new read[%#x] to " | ||
| 2145 | "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, | ||
| 2146 | failrec->this_mirror, num_copies, failrec->in_validation); | ||
| 2147 | |||
| 2148 | tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, | ||
| 2149 | failrec->bio_flags, 0); | ||
| 2150 | return 0; | ||
| 2151 | } | ||
| 2152 | |||
| 1602 | /* lots and lots of room for performance fixes in the end_bio funcs */ | 2153 | /* lots and lots of room for performance fixes in the end_bio funcs */ |
| 1603 | 2154 | ||
| 1604 | /* | 2155 | /* |
| @@ -1697,6 +2248,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 1697 | struct extent_state *cached = NULL; | 2248 | struct extent_state *cached = NULL; |
| 1698 | struct extent_state *state; | 2249 | struct extent_state *state; |
| 1699 | 2250 | ||
| 2251 | pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " | ||
| 2252 | "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, | ||
| 2253 | (long int)bio->bi_bdev); | ||
| 1700 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2254 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 1701 | 2255 | ||
| 1702 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 2256 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + |
| @@ -1727,11 +2281,19 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 1727 | state); | 2281 | state); |
| 1728 | if (ret) | 2282 | if (ret) |
| 1729 | uptodate = 0; | 2283 | uptodate = 0; |
| 2284 | else | ||
| 2285 | clean_io_failure(start, page); | ||
| 1730 | } | 2286 | } |
| 1731 | if (!uptodate && tree->ops && | 2287 | if (!uptodate) { |
| 1732 | tree->ops->readpage_io_failed_hook) { | 2288 | u64 failed_mirror; |
| 1733 | ret = tree->ops->readpage_io_failed_hook(bio, page, | 2289 | failed_mirror = (u64)bio->bi_bdev; |
| 1734 | start, end, NULL); | 2290 | if (tree->ops && tree->ops->readpage_io_failed_hook) |
| 2291 | ret = tree->ops->readpage_io_failed_hook( | ||
| 2292 | bio, page, start, end, | ||
| 2293 | failed_mirror, state); | ||
| 2294 | else | ||
| 2295 | ret = bio_readpage_error(bio, page, start, end, | ||
| 2296 | failed_mirror, NULL); | ||
| 1735 | if (ret == 0) { | 2297 | if (ret == 0) { |
| 1736 | uptodate = | 2298 | uptodate = |
| 1737 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 2299 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
| @@ -1811,6 +2373,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
| 1811 | mirror_num, bio_flags, start); | 2373 | mirror_num, bio_flags, start); |
| 1812 | else | 2374 | else |
| 1813 | submit_bio(rw, bio); | 2375 | submit_bio(rw, bio); |
| 2376 | |||
| 1814 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 2377 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
| 1815 | ret = -EOPNOTSUPP; | 2378 | ret = -EOPNOTSUPP; |
| 1816 | bio_put(bio); | 2379 | bio_put(bio); |
| @@ -2076,16 +2639,16 @@ out: | |||
| 2076 | } | 2639 | } |
| 2077 | 2640 | ||
| 2078 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | 2641 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, |
| 2079 | get_extent_t *get_extent) | 2642 | get_extent_t *get_extent, int mirror_num) |
| 2080 | { | 2643 | { |
| 2081 | struct bio *bio = NULL; | 2644 | struct bio *bio = NULL; |
| 2082 | unsigned long bio_flags = 0; | 2645 | unsigned long bio_flags = 0; |
| 2083 | int ret; | 2646 | int ret; |
| 2084 | 2647 | ||
| 2085 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, | 2648 | ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, |
| 2086 | &bio_flags); | 2649 | &bio_flags); |
| 2087 | if (bio) | 2650 | if (bio) |
| 2088 | ret = submit_one_bio(READ, bio, 0, bio_flags); | 2651 | ret = submit_one_bio(READ, bio, mirror_num, bio_flags); |
| 2089 | return ret; | 2652 | return ret; |
| 2090 | } | 2653 | } |
| 2091 | 2654 | ||
| @@ -2136,6 +2699,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2136 | int compressed; | 2699 | int compressed; |
| 2137 | int write_flags; | 2700 | int write_flags; |
| 2138 | unsigned long nr_written = 0; | 2701 | unsigned long nr_written = 0; |
| 2702 | bool fill_delalloc = true; | ||
| 2139 | 2703 | ||
| 2140 | if (wbc->sync_mode == WB_SYNC_ALL) | 2704 | if (wbc->sync_mode == WB_SYNC_ALL) |
| 2141 | write_flags = WRITE_SYNC; | 2705 | write_flags = WRITE_SYNC; |
| @@ -2145,6 +2709,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2145 | trace___extent_writepage(page, inode, wbc); | 2709 | trace___extent_writepage(page, inode, wbc); |
| 2146 | 2710 | ||
| 2147 | WARN_ON(!PageLocked(page)); | 2711 | WARN_ON(!PageLocked(page)); |
| 2712 | |||
| 2713 | ClearPageError(page); | ||
| 2714 | |||
| 2148 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2715 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
| 2149 | if (page->index > end_index || | 2716 | if (page->index > end_index || |
| 2150 | (page->index == end_index && !pg_offset)) { | 2717 | (page->index == end_index && !pg_offset)) { |
| @@ -2166,10 +2733,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2166 | 2733 | ||
| 2167 | set_page_extent_mapped(page); | 2734 | set_page_extent_mapped(page); |
| 2168 | 2735 | ||
| 2736 | if (!tree->ops || !tree->ops->fill_delalloc) | ||
| 2737 | fill_delalloc = false; | ||
| 2738 | |||
| 2169 | delalloc_start = start; | 2739 | delalloc_start = start; |
| 2170 | delalloc_end = 0; | 2740 | delalloc_end = 0; |
| 2171 | page_started = 0; | 2741 | page_started = 0; |
| 2172 | if (!epd->extent_locked) { | 2742 | if (!epd->extent_locked && fill_delalloc) { |
| 2173 | u64 delalloc_to_write = 0; | 2743 | u64 delalloc_to_write = 0; |
| 2174 | /* | 2744 | /* |
| 2175 | * make sure the wbc mapping index is at least updated | 2745 | * make sure the wbc mapping index is at least updated |
| @@ -2421,10 +2991,16 @@ retry: | |||
| 2421 | * swizzled back from swapper_space to tmpfs file | 2991 | * swizzled back from swapper_space to tmpfs file |
| 2422 | * mapping | 2992 | * mapping |
| 2423 | */ | 2993 | */ |
| 2424 | if (tree->ops && tree->ops->write_cache_pages_lock_hook) | 2994 | if (tree->ops && |
| 2425 | tree->ops->write_cache_pages_lock_hook(page); | 2995 | tree->ops->write_cache_pages_lock_hook) { |
| 2426 | else | 2996 | tree->ops->write_cache_pages_lock_hook(page, |
| 2427 | lock_page(page); | 2997 | data, flush_fn); |
| 2998 | } else { | ||
| 2999 | if (!trylock_page(page)) { | ||
| 3000 | flush_fn(data); | ||
| 3001 | lock_page(page); | ||
| 3002 | } | ||
| 3003 | } | ||
| 2428 | 3004 | ||
| 2429 | if (unlikely(page->mapping != mapping)) { | 3005 | if (unlikely(page->mapping != mapping)) { |
| 2430 | unlock_page(page); | 3006 | unlock_page(page); |
| @@ -2926,7 +3502,7 @@ out: | |||
| 2926 | return ret; | 3502 | return ret; |
| 2927 | } | 3503 | } |
| 2928 | 3504 | ||
| 2929 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | 3505 | inline struct page *extent_buffer_page(struct extent_buffer *eb, |
| 2930 | unsigned long i) | 3506 | unsigned long i) |
| 2931 | { | 3507 | { |
| 2932 | struct page *p; | 3508 | struct page *p; |
| @@ -2951,7 +3527,7 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, | |||
| 2951 | return p; | 3527 | return p; |
| 2952 | } | 3528 | } |
| 2953 | 3529 | ||
| 2954 | static inline unsigned long num_extent_pages(u64 start, u64 len) | 3530 | inline unsigned long num_extent_pages(u64 start, u64 len) |
| 2955 | { | 3531 | { |
| 2956 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | 3532 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - |
| 2957 | (start >> PAGE_CACHE_SHIFT); | 3533 | (start >> PAGE_CACHE_SHIFT); |
| @@ -3204,6 +3780,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
| 3204 | PAGECACHE_TAG_DIRTY); | 3780 | PAGECACHE_TAG_DIRTY); |
| 3205 | } | 3781 | } |
| 3206 | spin_unlock_irq(&page->mapping->tree_lock); | 3782 | spin_unlock_irq(&page->mapping->tree_lock); |
| 3783 | ClearPageError(page); | ||
| 3207 | unlock_page(page); | 3784 | unlock_page(page); |
| 3208 | } | 3785 | } |
| 3209 | return 0; | 3786 | return 0; |
| @@ -3349,8 +3926,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3349 | } | 3926 | } |
| 3350 | 3927 | ||
| 3351 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 3928 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
| 3352 | struct extent_buffer *eb, | 3929 | struct extent_buffer *eb, u64 start, int wait, |
| 3353 | u64 start, int wait, | ||
| 3354 | get_extent_t *get_extent, int mirror_num) | 3930 | get_extent_t *get_extent, int mirror_num) |
| 3355 | { | 3931 | { |
| 3356 | unsigned long i; | 3932 | unsigned long i; |
| @@ -3386,7 +3962,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3386 | num_pages = num_extent_pages(eb->start, eb->len); | 3962 | num_pages = num_extent_pages(eb->start, eb->len); |
| 3387 | for (i = start_i; i < num_pages; i++) { | 3963 | for (i = start_i; i < num_pages; i++) { |
| 3388 | page = extent_buffer_page(eb, i); | 3964 | page = extent_buffer_page(eb, i); |
| 3389 | if (!wait) { | 3965 | if (wait == WAIT_NONE) { |
| 3390 | if (!trylock_page(page)) | 3966 | if (!trylock_page(page)) |
| 3391 | goto unlock_exit; | 3967 | goto unlock_exit; |
| 3392 | } else { | 3968 | } else { |
| @@ -3430,7 +4006,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3430 | if (bio) | 4006 | if (bio) |
| 3431 | submit_one_bio(READ, bio, mirror_num, bio_flags); | 4007 | submit_one_bio(READ, bio, mirror_num, bio_flags); |
| 3432 | 4008 | ||
| 3433 | if (ret || !wait) | 4009 | if (ret || wait != WAIT_COMPLETE) |
| 3434 | return ret; | 4010 | return ret; |
| 3435 | 4011 | ||
| 3436 | for (i = start_i; i < num_pages; i++) { | 4012 | for (i = start_i; i < num_pages; i++) { |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 7b2f0c3e7929..feb9be0e23bc 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -17,6 +17,8 @@ | |||
| 17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
| 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
| 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) |
| 20 | #define EXTENT_NEED_WAIT (1 << 13) | ||
| 21 | #define EXTENT_DAMAGED (1 << 14) | ||
| 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 22 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 23 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
| 22 | 24 | ||
| @@ -32,6 +34,7 @@ | |||
| 32 | #define EXTENT_BUFFER_BLOCKING 1 | 34 | #define EXTENT_BUFFER_BLOCKING 1 |
| 33 | #define EXTENT_BUFFER_DIRTY 2 | 35 | #define EXTENT_BUFFER_DIRTY 2 |
| 34 | #define EXTENT_BUFFER_CORRUPT 3 | 36 | #define EXTENT_BUFFER_CORRUPT 3 |
| 37 | #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ | ||
| 35 | 38 | ||
| 36 | /* these are flags for extent_clear_unlock_delalloc */ | 39 | /* these are flags for extent_clear_unlock_delalloc */ |
| 37 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 40 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
| @@ -67,7 +70,7 @@ struct extent_io_ops { | |||
| 67 | unsigned long bio_flags); | 70 | unsigned long bio_flags); |
| 68 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 71 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
| 69 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, | 72 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, |
| 70 | u64 start, u64 end, | 73 | u64 start, u64 end, u64 failed_mirror, |
| 71 | struct extent_state *state); | 74 | struct extent_state *state); |
| 72 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, | 75 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, |
| 73 | u64 start, u64 end, | 76 | u64 start, u64 end, |
| @@ -85,7 +88,8 @@ struct extent_io_ops { | |||
| 85 | struct extent_state *other); | 88 | struct extent_state *other); |
| 86 | void (*split_extent_hook)(struct inode *inode, | 89 | void (*split_extent_hook)(struct inode *inode, |
| 87 | struct extent_state *orig, u64 split); | 90 | struct extent_state *orig, u64 split); |
| 88 | int (*write_cache_pages_lock_hook)(struct page *page); | 91 | int (*write_cache_pages_lock_hook)(struct page *page, void *data, |
| 92 | void (*flush_fn)(void *)); | ||
| 89 | }; | 93 | }; |
| 90 | 94 | ||
| 91 | struct extent_io_tree { | 95 | struct extent_io_tree { |
| @@ -185,7 +189,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 185 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 186 | gfp_t mask); | 190 | gfp_t mask); |
| 187 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | 191 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, |
| 188 | get_extent_t *get_extent); | 192 | get_extent_t *get_extent, int mirror_num); |
| 189 | int __init extent_io_init(void); | 193 | int __init extent_io_init(void); |
| 190 | void extent_io_exit(void); | 194 | void extent_io_exit(void); |
| 191 | 195 | ||
| @@ -214,6 +218,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 214 | gfp_t mask); | 218 | gfp_t mask); |
| 215 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 219 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 216 | gfp_t mask); | 220 | gfp_t mask); |
| 221 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 222 | int bits, int clear_bits, gfp_t mask); | ||
| 217 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 223 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
| 218 | struct extent_state **cached_state, gfp_t mask); | 224 | struct extent_state **cached_state, gfp_t mask); |
| 219 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 225 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| @@ -248,9 +254,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 248 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 254 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
| 249 | u64 start, unsigned long len); | 255 | u64 start, unsigned long len); |
| 250 | void free_extent_buffer(struct extent_buffer *eb); | 256 | void free_extent_buffer(struct extent_buffer *eb); |
| 257 | #define WAIT_NONE 0 | ||
| 258 | #define WAIT_COMPLETE 1 | ||
| 259 | #define WAIT_PAGE_LOCK 2 | ||
| 251 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 260 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
| 252 | struct extent_buffer *eb, u64 start, int wait, | 261 | struct extent_buffer *eb, u64 start, int wait, |
| 253 | get_extent_t *get_extent, int mirror_num); | 262 | get_extent_t *get_extent, int mirror_num); |
| 263 | unsigned long num_extent_pages(u64 start, u64 len); | ||
| 264 | struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i); | ||
| 254 | 265 | ||
| 255 | static inline void extent_buffer_get(struct extent_buffer *eb) | 266 | static inline void extent_buffer_get(struct extent_buffer *eb) |
| 256 | { | 267 | { |
| @@ -300,4 +311,10 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 300 | struct bio * | 311 | struct bio * |
| 301 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 312 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
| 302 | gfp_t gfp_flags); | 313 | gfp_t gfp_flags); |
| 314 | |||
| 315 | struct btrfs_mapping_tree; | ||
| 316 | |||
| 317 | int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | ||
| 318 | u64 length, u64 logical, struct page *page, | ||
| 319 | int mirror_num); | ||
| 303 | #endif | 320 | #endif |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a1cb7821becd..c7fb3a4247d3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -91,8 +91,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
| 91 | struct btrfs_csum_item *item; | 91 | struct btrfs_csum_item *item; |
| 92 | struct extent_buffer *leaf; | 92 | struct extent_buffer *leaf; |
| 93 | u64 csum_offset = 0; | 93 | u64 csum_offset = 0; |
| 94 | u16 csum_size = | 94 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 95 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
| 96 | int csums_in_item; | 95 | int csums_in_item; |
| 97 | 96 | ||
| 98 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 97 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
| @@ -162,8 +161,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
| 162 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
| 163 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
| 164 | u32 diff; | 163 | u32 diff; |
| 165 | u16 csum_size = | 164 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 166 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
| 167 | int ret; | 165 | int ret; |
| 168 | struct btrfs_path *path; | 166 | struct btrfs_path *path; |
| 169 | struct btrfs_csum_item *item = NULL; | 167 | struct btrfs_csum_item *item = NULL; |
| @@ -290,7 +288,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
| 290 | int ret; | 288 | int ret; |
| 291 | size_t size; | 289 | size_t size; |
| 292 | u64 csum_end; | 290 | u64 csum_end; |
| 293 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | 291 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 294 | 292 | ||
| 295 | path = btrfs_alloc_path(); | 293 | path = btrfs_alloc_path(); |
| 296 | if (!path) | 294 | if (!path) |
| @@ -492,8 +490,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, | |||
| 492 | u64 bytenr, u64 len) | 490 | u64 bytenr, u64 len) |
| 493 | { | 491 | { |
| 494 | struct extent_buffer *leaf; | 492 | struct extent_buffer *leaf; |
| 495 | u16 csum_size = | 493 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 496 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
| 497 | u64 csum_end; | 494 | u64 csum_end; |
| 498 | u64 end_byte = bytenr + len; | 495 | u64 end_byte = bytenr + len; |
| 499 | u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; | 496 | u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; |
| @@ -549,8 +546,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
| 549 | u64 csum_end; | 546 | u64 csum_end; |
| 550 | struct extent_buffer *leaf; | 547 | struct extent_buffer *leaf; |
| 551 | int ret; | 548 | int ret; |
| 552 | u16 csum_size = | 549 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 553 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
| 554 | int blocksize_bits = root->fs_info->sb->s_blocksize_bits; | 550 | int blocksize_bits = root->fs_info->sb->s_blocksize_bits; |
| 555 | 551 | ||
| 556 | root = root->fs_info->csum_root; | 552 | root = root->fs_info->csum_root; |
| @@ -676,8 +672,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
| 676 | struct btrfs_sector_sum *sector_sum; | 672 | struct btrfs_sector_sum *sector_sum; |
| 677 | u32 nritems; | 673 | u32 nritems; |
| 678 | u32 ins_size; | 674 | u32 ins_size; |
| 679 | u16 csum_size = | 675 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
| 680 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
| 681 | 676 | ||
| 682 | path = btrfs_alloc_path(); | 677 | path = btrfs_alloc_path(); |
| 683 | if (!path) | 678 | if (!path) |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1266f6e9cdb2..dafdfa059bf6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -1069,6 +1069,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
| 1069 | int i; | 1069 | int i; |
| 1070 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 1070 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
| 1071 | struct inode *inode = fdentry(file)->d_inode; | 1071 | struct inode *inode = fdentry(file)->d_inode; |
| 1072 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
| 1072 | int err = 0; | 1073 | int err = 0; |
| 1073 | int faili = 0; | 1074 | int faili = 0; |
| 1074 | u64 start_pos; | 1075 | u64 start_pos; |
| @@ -1080,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
| 1080 | again: | 1081 | again: |
| 1081 | for (i = 0; i < num_pages; i++) { | 1082 | for (i = 0; i < num_pages; i++) { |
| 1082 | pages[i] = find_or_create_page(inode->i_mapping, index + i, | 1083 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
| 1083 | GFP_NOFS); | 1084 | mask); |
| 1084 | if (!pages[i]) { | 1085 | if (!pages[i]) { |
| 1085 | faili = i - 1; | 1086 | faili = i - 1; |
| 1086 | err = -ENOMEM; | 1087 | err = -ENOMEM; |
| @@ -1615,10 +1616,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1615 | goto out; | 1616 | goto out; |
| 1616 | } | 1617 | } |
| 1617 | 1618 | ||
| 1618 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
| 1619 | if (ret) | ||
| 1620 | goto out; | ||
| 1621 | |||
| 1622 | locked_end = alloc_end - 1; | 1619 | locked_end = alloc_end - 1; |
| 1623 | while (1) { | 1620 | while (1) { |
| 1624 | struct btrfs_ordered_extent *ordered; | 1621 | struct btrfs_ordered_extent *ordered; |
| @@ -1664,11 +1661,27 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1664 | if (em->block_start == EXTENT_MAP_HOLE || | 1661 | if (em->block_start == EXTENT_MAP_HOLE || |
| 1665 | (cur_offset >= inode->i_size && | 1662 | (cur_offset >= inode->i_size && |
| 1666 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 1663 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
| 1664 | |||
| 1665 | /* | ||
| 1666 | * Make sure we have enough space before we do the | ||
| 1667 | * allocation. | ||
| 1668 | */ | ||
| 1669 | ret = btrfs_check_data_free_space(inode, last_byte - | ||
| 1670 | cur_offset); | ||
| 1671 | if (ret) { | ||
| 1672 | free_extent_map(em); | ||
| 1673 | break; | ||
| 1674 | } | ||
| 1675 | |||
| 1667 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | 1676 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, |
| 1668 | last_byte - cur_offset, | 1677 | last_byte - cur_offset, |
| 1669 | 1 << inode->i_blkbits, | 1678 | 1 << inode->i_blkbits, |
| 1670 | offset + len, | 1679 | offset + len, |
| 1671 | &alloc_hint); | 1680 | &alloc_hint); |
| 1681 | |||
| 1682 | /* Let go of our reservation. */ | ||
| 1683 | btrfs_free_reserved_data_space(inode, last_byte - | ||
| 1684 | cur_offset); | ||
| 1672 | if (ret < 0) { | 1685 | if (ret < 0) { |
| 1673 | free_extent_map(em); | 1686 | free_extent_map(em); |
| 1674 | break; | 1687 | break; |
| @@ -1694,8 +1707,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1694 | } | 1707 | } |
| 1695 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 1708 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
| 1696 | &cached_state, GFP_NOFS); | 1709 | &cached_state, GFP_NOFS); |
| 1697 | |||
| 1698 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
| 1699 | out: | 1710 | out: |
| 1700 | mutex_unlock(&inode->i_mutex); | 1711 | mutex_unlock(&inode->i_mutex); |
| 1701 | return ret; | 1712 | return ret; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 41ac927401d0..7a15fcfb3e1f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
| 22 | #include <linux/math64.h> | 22 | #include <linux/math64.h> |
| 23 | #include <linux/ratelimit.h> | ||
| 23 | #include "ctree.h" | 24 | #include "ctree.h" |
| 24 | #include "free-space-cache.h" | 25 | #include "free-space-cache.h" |
| 25 | #include "transaction.h" | 26 | #include "transaction.h" |
| @@ -84,6 +85,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
| 84 | *block_group, struct btrfs_path *path) | 85 | *block_group, struct btrfs_path *path) |
| 85 | { | 86 | { |
| 86 | struct inode *inode = NULL; | 87 | struct inode *inode = NULL; |
| 88 | u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; | ||
| 87 | 89 | ||
| 88 | spin_lock(&block_group->lock); | 90 | spin_lock(&block_group->lock); |
| 89 | if (block_group->inode) | 91 | if (block_group->inode) |
| @@ -98,13 +100,14 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
| 98 | return inode; | 100 | return inode; |
| 99 | 101 | ||
| 100 | spin_lock(&block_group->lock); | 102 | spin_lock(&block_group->lock); |
| 101 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { | 103 | if (!((BTRFS_I(inode)->flags & flags) == flags)) { |
| 102 | printk(KERN_INFO "Old style space inode found, converting.\n"); | 104 | printk(KERN_INFO "Old style space inode found, converting.\n"); |
| 103 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; | 105 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | |
| 106 | BTRFS_INODE_NODATACOW; | ||
| 104 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | 107 | block_group->disk_cache_state = BTRFS_DC_CLEAR; |
| 105 | } | 108 | } |
| 106 | 109 | ||
| 107 | if (!btrfs_fs_closing(root->fs_info)) { | 110 | if (!block_group->iref) { |
| 108 | block_group->inode = igrab(inode); | 111 | block_group->inode = igrab(inode); |
| 109 | block_group->iref = 1; | 112 | block_group->iref = 1; |
| 110 | } | 113 | } |
| @@ -122,12 +125,17 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
| 122 | struct btrfs_free_space_header *header; | 125 | struct btrfs_free_space_header *header; |
| 123 | struct btrfs_inode_item *inode_item; | 126 | struct btrfs_inode_item *inode_item; |
| 124 | struct extent_buffer *leaf; | 127 | struct extent_buffer *leaf; |
| 128 | u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; | ||
| 125 | int ret; | 129 | int ret; |
| 126 | 130 | ||
| 127 | ret = btrfs_insert_empty_inode(trans, root, path, ino); | 131 | ret = btrfs_insert_empty_inode(trans, root, path, ino); |
| 128 | if (ret) | 132 | if (ret) |
| 129 | return ret; | 133 | return ret; |
| 130 | 134 | ||
| 135 | /* We inline crc's for the free disk space cache */ | ||
| 136 | if (ino != BTRFS_FREE_INO_OBJECTID) | ||
| 137 | flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; | ||
| 138 | |||
| 131 | leaf = path->nodes[0]; | 139 | leaf = path->nodes[0]; |
| 132 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 140 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
| 133 | struct btrfs_inode_item); | 141 | struct btrfs_inode_item); |
| @@ -140,8 +148,7 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
| 140 | btrfs_set_inode_uid(leaf, inode_item, 0); | 148 | btrfs_set_inode_uid(leaf, inode_item, 0); |
| 141 | btrfs_set_inode_gid(leaf, inode_item, 0); | 149 | btrfs_set_inode_gid(leaf, inode_item, 0); |
| 142 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | 150 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); |
| 143 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | 151 | btrfs_set_inode_flags(leaf, inode_item, flags); |
| 144 | BTRFS_INODE_PREALLOC); | ||
| 145 | btrfs_set_inode_nlink(leaf, inode_item, 1); | 152 | btrfs_set_inode_nlink(leaf, inode_item, 1); |
| 146 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | 153 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); |
| 147 | btrfs_set_inode_block_group(leaf, inode_item, offset); | 154 | btrfs_set_inode_block_group(leaf, inode_item, offset); |
| @@ -191,16 +198,24 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
| 191 | struct inode *inode) | 198 | struct inode *inode) |
| 192 | { | 199 | { |
| 193 | struct btrfs_block_rsv *rsv; | 200 | struct btrfs_block_rsv *rsv; |
| 201 | u64 needed_bytes; | ||
| 194 | loff_t oldsize; | 202 | loff_t oldsize; |
| 195 | int ret = 0; | 203 | int ret = 0; |
| 196 | 204 | ||
| 197 | rsv = trans->block_rsv; | 205 | rsv = trans->block_rsv; |
| 198 | trans->block_rsv = root->orphan_block_rsv; | 206 | trans->block_rsv = &root->fs_info->global_block_rsv; |
| 199 | ret = btrfs_block_rsv_check(trans, root, | 207 | |
| 200 | root->orphan_block_rsv, | 208 | /* 1 for slack space, 1 for updating the inode */ |
| 201 | 0, 5); | 209 | needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + |
| 202 | if (ret) | 210 | btrfs_calc_trans_metadata_size(root, 1); |
| 203 | return ret; | 211 | |
| 212 | spin_lock(&trans->block_rsv->lock); | ||
| 213 | if (trans->block_rsv->reserved < needed_bytes) { | ||
| 214 | spin_unlock(&trans->block_rsv->lock); | ||
| 215 | trans->block_rsv = rsv; | ||
| 216 | return -ENOSPC; | ||
| 217 | } | ||
| 218 | spin_unlock(&trans->block_rsv->lock); | ||
| 204 | 219 | ||
| 205 | oldsize = i_size_read(inode); | 220 | oldsize = i_size_read(inode); |
| 206 | btrfs_i_size_write(inode, 0); | 221 | btrfs_i_size_write(inode, 0); |
| @@ -213,13 +228,15 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
| 213 | ret = btrfs_truncate_inode_items(trans, root, inode, | 228 | ret = btrfs_truncate_inode_items(trans, root, inode, |
| 214 | 0, BTRFS_EXTENT_DATA_KEY); | 229 | 0, BTRFS_EXTENT_DATA_KEY); |
| 215 | 230 | ||
| 216 | trans->block_rsv = rsv; | ||
| 217 | if (ret) { | 231 | if (ret) { |
| 232 | trans->block_rsv = rsv; | ||
| 218 | WARN_ON(1); | 233 | WARN_ON(1); |
| 219 | return ret; | 234 | return ret; |
| 220 | } | 235 | } |
| 221 | 236 | ||
| 222 | ret = btrfs_update_inode(trans, root, inode); | 237 | ret = btrfs_update_inode(trans, root, inode); |
| 238 | trans->block_rsv = rsv; | ||
| 239 | |||
| 223 | return ret; | 240 | return ret; |
| 224 | } | 241 | } |
| 225 | 242 | ||
| @@ -242,26 +259,342 @@ static int readahead_cache(struct inode *inode) | |||
| 242 | return 0; | 259 | return 0; |
| 243 | } | 260 | } |
| 244 | 261 | ||
| 262 | struct io_ctl { | ||
| 263 | void *cur, *orig; | ||
| 264 | struct page *page; | ||
| 265 | struct page **pages; | ||
| 266 | struct btrfs_root *root; | ||
| 267 | unsigned long size; | ||
| 268 | int index; | ||
| 269 | int num_pages; | ||
| 270 | unsigned check_crcs:1; | ||
| 271 | }; | ||
| 272 | |||
| 273 | static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, | ||
| 274 | struct btrfs_root *root) | ||
| 275 | { | ||
| 276 | memset(io_ctl, 0, sizeof(struct io_ctl)); | ||
| 277 | io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
| 278 | PAGE_CACHE_SHIFT; | ||
| 279 | io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, | ||
| 280 | GFP_NOFS); | ||
| 281 | if (!io_ctl->pages) | ||
| 282 | return -ENOMEM; | ||
| 283 | io_ctl->root = root; | ||
| 284 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) | ||
| 285 | io_ctl->check_crcs = 1; | ||
| 286 | return 0; | ||
| 287 | } | ||
| 288 | |||
| 289 | static void io_ctl_free(struct io_ctl *io_ctl) | ||
| 290 | { | ||
| 291 | kfree(io_ctl->pages); | ||
| 292 | } | ||
| 293 | |||
| 294 | static void io_ctl_unmap_page(struct io_ctl *io_ctl) | ||
| 295 | { | ||
| 296 | if (io_ctl->cur) { | ||
| 297 | kunmap(io_ctl->page); | ||
| 298 | io_ctl->cur = NULL; | ||
| 299 | io_ctl->orig = NULL; | ||
| 300 | } | ||
| 301 | } | ||
| 302 | |||
| 303 | static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) | ||
| 304 | { | ||
| 305 | WARN_ON(io_ctl->cur); | ||
| 306 | BUG_ON(io_ctl->index >= io_ctl->num_pages); | ||
| 307 | io_ctl->page = io_ctl->pages[io_ctl->index++]; | ||
| 308 | io_ctl->cur = kmap(io_ctl->page); | ||
| 309 | io_ctl->orig = io_ctl->cur; | ||
| 310 | io_ctl->size = PAGE_CACHE_SIZE; | ||
| 311 | if (clear) | ||
| 312 | memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); | ||
| 313 | } | ||
| 314 | |||
| 315 | static void io_ctl_drop_pages(struct io_ctl *io_ctl) | ||
| 316 | { | ||
| 317 | int i; | ||
| 318 | |||
| 319 | io_ctl_unmap_page(io_ctl); | ||
| 320 | |||
| 321 | for (i = 0; i < io_ctl->num_pages; i++) { | ||
| 322 | ClearPageChecked(io_ctl->pages[i]); | ||
| 323 | unlock_page(io_ctl->pages[i]); | ||
| 324 | page_cache_release(io_ctl->pages[i]); | ||
| 325 | } | ||
| 326 | } | ||
| 327 | |||
| 328 | static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, | ||
| 329 | int uptodate) | ||
| 330 | { | ||
| 331 | struct page *page; | ||
| 332 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
| 333 | int i; | ||
| 334 | |||
| 335 | for (i = 0; i < io_ctl->num_pages; i++) { | ||
| 336 | page = find_or_create_page(inode->i_mapping, i, mask); | ||
| 337 | if (!page) { | ||
| 338 | io_ctl_drop_pages(io_ctl); | ||
| 339 | return -ENOMEM; | ||
| 340 | } | ||
| 341 | io_ctl->pages[i] = page; | ||
| 342 | if (uptodate && !PageUptodate(page)) { | ||
| 343 | btrfs_readpage(NULL, page); | ||
| 344 | lock_page(page); | ||
| 345 | if (!PageUptodate(page)) { | ||
| 346 | printk(KERN_ERR "btrfs: error reading free " | ||
| 347 | "space cache\n"); | ||
| 348 | io_ctl_drop_pages(io_ctl); | ||
| 349 | return -EIO; | ||
| 350 | } | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | return 0; | ||
| 355 | } | ||
| 356 | |||
| 357 | static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | ||
| 358 | { | ||
| 359 | u64 *val; | ||
| 360 | |||
| 361 | io_ctl_map_page(io_ctl, 1); | ||
| 362 | |||
| 363 | /* | ||
| 364 | * Skip the csum areas. If we don't check crcs then we just have a | ||
| 365 | * 64bit chunk at the front of the first page. | ||
| 366 | */ | ||
| 367 | if (io_ctl->check_crcs) { | ||
| 368 | io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); | ||
| 369 | io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); | ||
| 370 | } else { | ||
| 371 | io_ctl->cur += sizeof(u64); | ||
| 372 | io_ctl->size -= sizeof(u64) * 2; | ||
| 373 | } | ||
| 374 | |||
| 375 | val = io_ctl->cur; | ||
| 376 | *val = cpu_to_le64(generation); | ||
| 377 | io_ctl->cur += sizeof(u64); | ||
| 378 | } | ||
| 379 | |||
| 380 | static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) | ||
| 381 | { | ||
| 382 | u64 *gen; | ||
| 383 | |||
| 384 | /* | ||
| 385 | * Skip the crc area. If we don't check crcs then we just have a 64bit | ||
| 386 | * chunk at the front of the first page. | ||
| 387 | */ | ||
| 388 | if (io_ctl->check_crcs) { | ||
| 389 | io_ctl->cur += sizeof(u32) * io_ctl->num_pages; | ||
| 390 | io_ctl->size -= sizeof(u64) + | ||
| 391 | (sizeof(u32) * io_ctl->num_pages); | ||
| 392 | } else { | ||
| 393 | io_ctl->cur += sizeof(u64); | ||
| 394 | io_ctl->size -= sizeof(u64) * 2; | ||
| 395 | } | ||
| 396 | |||
| 397 | gen = io_ctl->cur; | ||
| 398 | if (le64_to_cpu(*gen) != generation) { | ||
| 399 | printk_ratelimited(KERN_ERR "btrfs: space cache generation " | ||
| 400 | "(%Lu) does not match inode (%Lu)\n", *gen, | ||
| 401 | generation); | ||
| 402 | io_ctl_unmap_page(io_ctl); | ||
| 403 | return -EIO; | ||
| 404 | } | ||
| 405 | io_ctl->cur += sizeof(u64); | ||
| 406 | return 0; | ||
| 407 | } | ||
| 408 | |||
| 409 | static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) | ||
| 410 | { | ||
| 411 | u32 *tmp; | ||
| 412 | u32 crc = ~(u32)0; | ||
| 413 | unsigned offset = 0; | ||
| 414 | |||
| 415 | if (!io_ctl->check_crcs) { | ||
| 416 | io_ctl_unmap_page(io_ctl); | ||
| 417 | return; | ||
| 418 | } | ||
| 419 | |||
| 420 | if (index == 0) | ||
| 421 | offset = sizeof(u32) * io_ctl->num_pages;; | ||
| 422 | |||
| 423 | crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, | ||
| 424 | PAGE_CACHE_SIZE - offset); | ||
| 425 | btrfs_csum_final(crc, (char *)&crc); | ||
| 426 | io_ctl_unmap_page(io_ctl); | ||
| 427 | tmp = kmap(io_ctl->pages[0]); | ||
| 428 | tmp += index; | ||
| 429 | *tmp = crc; | ||
| 430 | kunmap(io_ctl->pages[0]); | ||
| 431 | } | ||
| 432 | |||
| 433 | static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) | ||
| 434 | { | ||
| 435 | u32 *tmp, val; | ||
| 436 | u32 crc = ~(u32)0; | ||
| 437 | unsigned offset = 0; | ||
| 438 | |||
| 439 | if (!io_ctl->check_crcs) { | ||
| 440 | io_ctl_map_page(io_ctl, 0); | ||
| 441 | return 0; | ||
| 442 | } | ||
| 443 | |||
| 444 | if (index == 0) | ||
| 445 | offset = sizeof(u32) * io_ctl->num_pages; | ||
| 446 | |||
| 447 | tmp = kmap(io_ctl->pages[0]); | ||
| 448 | tmp += index; | ||
| 449 | val = *tmp; | ||
| 450 | kunmap(io_ctl->pages[0]); | ||
| 451 | |||
| 452 | io_ctl_map_page(io_ctl, 0); | ||
| 453 | crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, | ||
| 454 | PAGE_CACHE_SIZE - offset); | ||
| 455 | btrfs_csum_final(crc, (char *)&crc); | ||
| 456 | if (val != crc) { | ||
| 457 | printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " | ||
| 458 | "space cache\n"); | ||
| 459 | io_ctl_unmap_page(io_ctl); | ||
| 460 | return -EIO; | ||
| 461 | } | ||
| 462 | |||
| 463 | return 0; | ||
| 464 | } | ||
| 465 | |||
| 466 | static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, | ||
| 467 | void *bitmap) | ||
| 468 | { | ||
| 469 | struct btrfs_free_space_entry *entry; | ||
| 470 | |||
| 471 | if (!io_ctl->cur) | ||
| 472 | return -ENOSPC; | ||
| 473 | |||
| 474 | entry = io_ctl->cur; | ||
| 475 | entry->offset = cpu_to_le64(offset); | ||
| 476 | entry->bytes = cpu_to_le64(bytes); | ||
| 477 | entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : | ||
| 478 | BTRFS_FREE_SPACE_EXTENT; | ||
| 479 | io_ctl->cur += sizeof(struct btrfs_free_space_entry); | ||
| 480 | io_ctl->size -= sizeof(struct btrfs_free_space_entry); | ||
| 481 | |||
| 482 | if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) | ||
| 483 | return 0; | ||
| 484 | |||
| 485 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
| 486 | |||
| 487 | /* No more pages to map */ | ||
| 488 | if (io_ctl->index >= io_ctl->num_pages) | ||
| 489 | return 0; | ||
| 490 | |||
| 491 | /* map the next page */ | ||
| 492 | io_ctl_map_page(io_ctl, 1); | ||
| 493 | return 0; | ||
| 494 | } | ||
| 495 | |||
| 496 | static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) | ||
| 497 | { | ||
| 498 | if (!io_ctl->cur) | ||
| 499 | return -ENOSPC; | ||
| 500 | |||
| 501 | /* | ||
| 502 | * If we aren't at the start of the current page, unmap this one and | ||
| 503 | * map the next one if there is any left. | ||
| 504 | */ | ||
| 505 | if (io_ctl->cur != io_ctl->orig) { | ||
| 506 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
| 507 | if (io_ctl->index >= io_ctl->num_pages) | ||
| 508 | return -ENOSPC; | ||
| 509 | io_ctl_map_page(io_ctl, 0); | ||
| 510 | } | ||
| 511 | |||
| 512 | memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); | ||
| 513 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
| 514 | if (io_ctl->index < io_ctl->num_pages) | ||
| 515 | io_ctl_map_page(io_ctl, 0); | ||
| 516 | return 0; | ||
| 517 | } | ||
| 518 | |||
| 519 | static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) | ||
| 520 | { | ||
| 521 | /* | ||
| 522 | * If we're not on the boundary we know we've modified the page and we | ||
| 523 | * need to crc the page. | ||
| 524 | */ | ||
| 525 | if (io_ctl->cur != io_ctl->orig) | ||
| 526 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
| 527 | else | ||
| 528 | io_ctl_unmap_page(io_ctl); | ||
| 529 | |||
| 530 | while (io_ctl->index < io_ctl->num_pages) { | ||
| 531 | io_ctl_map_page(io_ctl, 1); | ||
| 532 | io_ctl_set_crc(io_ctl, io_ctl->index - 1); | ||
| 533 | } | ||
| 534 | } | ||
| 535 | |||
| 536 | static int io_ctl_read_entry(struct io_ctl *io_ctl, | ||
| 537 | struct btrfs_free_space *entry, u8 *type) | ||
| 538 | { | ||
| 539 | struct btrfs_free_space_entry *e; | ||
| 540 | |||
| 541 | e = io_ctl->cur; | ||
| 542 | entry->offset = le64_to_cpu(e->offset); | ||
| 543 | entry->bytes = le64_to_cpu(e->bytes); | ||
| 544 | *type = e->type; | ||
| 545 | io_ctl->cur += sizeof(struct btrfs_free_space_entry); | ||
| 546 | io_ctl->size -= sizeof(struct btrfs_free_space_entry); | ||
| 547 | |||
| 548 | if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) | ||
| 549 | return 0; | ||
| 550 | |||
| 551 | io_ctl_unmap_page(io_ctl); | ||
| 552 | |||
| 553 | if (io_ctl->index >= io_ctl->num_pages) | ||
| 554 | return 0; | ||
| 555 | |||
| 556 | return io_ctl_check_crc(io_ctl, io_ctl->index); | ||
| 557 | } | ||
| 558 | |||
| 559 | static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | ||
| 560 | struct btrfs_free_space *entry) | ||
| 561 | { | ||
| 562 | int ret; | ||
| 563 | |||
| 564 | if (io_ctl->cur && io_ctl->cur != io_ctl->orig) | ||
| 565 | io_ctl_unmap_page(io_ctl); | ||
| 566 | |||
| 567 | ret = io_ctl_check_crc(io_ctl, io_ctl->index); | ||
| 568 | if (ret) | ||
| 569 | return ret; | ||
| 570 | |||
| 571 | memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); | ||
| 572 | io_ctl_unmap_page(io_ctl); | ||
| 573 | |||
| 574 | return 0; | ||
| 575 | } | ||
| 576 | |||
| 245 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | 577 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, |
| 246 | struct btrfs_free_space_ctl *ctl, | 578 | struct btrfs_free_space_ctl *ctl, |
| 247 | struct btrfs_path *path, u64 offset) | 579 | struct btrfs_path *path, u64 offset) |
| 248 | { | 580 | { |
| 249 | struct btrfs_free_space_header *header; | 581 | struct btrfs_free_space_header *header; |
| 250 | struct extent_buffer *leaf; | 582 | struct extent_buffer *leaf; |
| 251 | struct page *page; | 583 | struct io_ctl io_ctl; |
| 252 | struct btrfs_key key; | 584 | struct btrfs_key key; |
| 585 | struct btrfs_free_space *e, *n; | ||
| 253 | struct list_head bitmaps; | 586 | struct list_head bitmaps; |
| 254 | u64 num_entries; | 587 | u64 num_entries; |
| 255 | u64 num_bitmaps; | 588 | u64 num_bitmaps; |
| 256 | u64 generation; | 589 | u64 generation; |
| 257 | pgoff_t index = 0; | 590 | u8 type; |
| 258 | int ret = 0; | 591 | int ret = 0; |
| 259 | 592 | ||
| 260 | INIT_LIST_HEAD(&bitmaps); | 593 | INIT_LIST_HEAD(&bitmaps); |
| 261 | 594 | ||
| 262 | /* Nothing in the space cache, goodbye */ | 595 | /* Nothing in the space cache, goodbye */ |
| 263 | if (!i_size_read(inode)) | 596 | if (!i_size_read(inode)) |
| 264 | goto out; | 597 | return 0; |
| 265 | 598 | ||
| 266 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 599 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
| 267 | key.offset = offset; | 600 | key.offset = offset; |
| @@ -269,11 +602,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
| 269 | 602 | ||
| 270 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 603 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 271 | if (ret < 0) | 604 | if (ret < 0) |
| 272 | goto out; | 605 | return 0; |
| 273 | else if (ret > 0) { | 606 | else if (ret > 0) { |
| 274 | btrfs_release_path(path); | 607 | btrfs_release_path(path); |
| 275 | ret = 0; | 608 | return 0; |
| 276 | goto out; | ||
| 277 | } | 609 | } |
| 278 | 610 | ||
| 279 | ret = -1; | 611 | ret = -1; |
| @@ -291,169 +623,100 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
| 291 | " not match free space cache generation (%llu)\n", | 623 | " not match free space cache generation (%llu)\n", |
| 292 | (unsigned long long)BTRFS_I(inode)->generation, | 624 | (unsigned long long)BTRFS_I(inode)->generation, |
| 293 | (unsigned long long)generation); | 625 | (unsigned long long)generation); |
| 294 | goto out; | 626 | return 0; |
| 295 | } | 627 | } |
| 296 | 628 | ||
| 297 | if (!num_entries) | 629 | if (!num_entries) |
| 298 | goto out; | 630 | return 0; |
| 299 | 631 | ||
| 632 | io_ctl_init(&io_ctl, inode, root); | ||
| 300 | ret = readahead_cache(inode); | 633 | ret = readahead_cache(inode); |
| 301 | if (ret) | 634 | if (ret) |
| 302 | goto out; | 635 | goto out; |
| 303 | 636 | ||
| 304 | while (1) { | 637 | ret = io_ctl_prepare_pages(&io_ctl, inode, 1); |
| 305 | struct btrfs_free_space_entry *entry; | 638 | if (ret) |
| 306 | struct btrfs_free_space *e; | 639 | goto out; |
| 307 | void *addr; | ||
| 308 | unsigned long offset = 0; | ||
| 309 | int need_loop = 0; | ||
| 310 | 640 | ||
| 311 | if (!num_entries && !num_bitmaps) | 641 | ret = io_ctl_check_crc(&io_ctl, 0); |
| 312 | break; | 642 | if (ret) |
| 643 | goto free_cache; | ||
| 313 | 644 | ||
| 314 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | 645 | ret = io_ctl_check_generation(&io_ctl, generation); |
| 315 | if (!page) | 646 | if (ret) |
| 647 | goto free_cache; | ||
| 648 | |||
| 649 | while (num_entries) { | ||
| 650 | e = kmem_cache_zalloc(btrfs_free_space_cachep, | ||
| 651 | GFP_NOFS); | ||
| 652 | if (!e) | ||
| 316 | goto free_cache; | 653 | goto free_cache; |
| 317 | 654 | ||
| 318 | if (!PageUptodate(page)) { | 655 | ret = io_ctl_read_entry(&io_ctl, e, &type); |
| 319 | btrfs_readpage(NULL, page); | 656 | if (ret) { |
| 320 | lock_page(page); | 657 | kmem_cache_free(btrfs_free_space_cachep, e); |
| 321 | if (!PageUptodate(page)) { | 658 | goto free_cache; |
| 322 | unlock_page(page); | ||
| 323 | page_cache_release(page); | ||
| 324 | printk(KERN_ERR "btrfs: error reading free " | ||
| 325 | "space cache\n"); | ||
| 326 | goto free_cache; | ||
| 327 | } | ||
| 328 | } | 659 | } |
| 329 | addr = kmap(page); | ||
| 330 | 660 | ||
| 331 | if (index == 0) { | 661 | if (!e->bytes) { |
| 332 | u64 *gen; | 662 | kmem_cache_free(btrfs_free_space_cachep, e); |
| 663 | goto free_cache; | ||
| 664 | } | ||
| 333 | 665 | ||
| 334 | /* | 666 | if (type == BTRFS_FREE_SPACE_EXTENT) { |
| 335 | * We put a bogus crc in the front of the first page in | 667 | spin_lock(&ctl->tree_lock); |
| 336 | * case old kernels try to mount a fs with the new | 668 | ret = link_free_space(ctl, e); |
| 337 | * format to make sure they discard the cache. | 669 | spin_unlock(&ctl->tree_lock); |
| 338 | */ | 670 | if (ret) { |
| 339 | addr += sizeof(u64); | 671 | printk(KERN_ERR "Duplicate entries in " |
| 340 | offset += sizeof(u64); | 672 | "free space cache, dumping\n"); |
| 341 | 673 | kmem_cache_free(btrfs_free_space_cachep, e); | |
| 342 | gen = addr; | ||
| 343 | if (*gen != BTRFS_I(inode)->generation) { | ||
| 344 | printk(KERN_ERR "btrfs: space cache generation" | ||
| 345 | " (%llu) does not match inode (%llu)\n", | ||
| 346 | (unsigned long long)*gen, | ||
| 347 | (unsigned long long) | ||
| 348 | BTRFS_I(inode)->generation); | ||
| 349 | kunmap(page); | ||
| 350 | unlock_page(page); | ||
| 351 | page_cache_release(page); | ||
| 352 | goto free_cache; | 674 | goto free_cache; |
| 353 | } | 675 | } |
| 354 | addr += sizeof(u64); | 676 | } else { |
| 355 | offset += sizeof(u64); | 677 | BUG_ON(!num_bitmaps); |
| 356 | } | 678 | num_bitmaps--; |
| 357 | entry = addr; | 679 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
| 358 | 680 | if (!e->bitmap) { | |
| 359 | while (1) { | 681 | kmem_cache_free( |
| 360 | if (!num_entries) | 682 | btrfs_free_space_cachep, e); |
| 361 | break; | ||
| 362 | |||
| 363 | need_loop = 1; | ||
| 364 | e = kmem_cache_zalloc(btrfs_free_space_cachep, | ||
| 365 | GFP_NOFS); | ||
| 366 | if (!e) { | ||
| 367 | kunmap(page); | ||
| 368 | unlock_page(page); | ||
| 369 | page_cache_release(page); | ||
| 370 | goto free_cache; | 683 | goto free_cache; |
| 371 | } | 684 | } |
| 372 | 685 | spin_lock(&ctl->tree_lock); | |
| 373 | e->offset = le64_to_cpu(entry->offset); | 686 | ret = link_free_space(ctl, e); |
| 374 | e->bytes = le64_to_cpu(entry->bytes); | 687 | ctl->total_bitmaps++; |
| 375 | if (!e->bytes) { | 688 | ctl->op->recalc_thresholds(ctl); |
| 376 | kunmap(page); | 689 | spin_unlock(&ctl->tree_lock); |
| 690 | if (ret) { | ||
| 691 | printk(KERN_ERR "Duplicate entries in " | ||
| 692 | "free space cache, dumping\n"); | ||
| 377 | kmem_cache_free(btrfs_free_space_cachep, e); | 693 | kmem_cache_free(btrfs_free_space_cachep, e); |
| 378 | unlock_page(page); | ||
| 379 | page_cache_release(page); | ||
| 380 | goto free_cache; | 694 | goto free_cache; |
| 381 | } | 695 | } |
| 382 | 696 | list_add_tail(&e->list, &bitmaps); | |
| 383 | if (entry->type == BTRFS_FREE_SPACE_EXTENT) { | ||
| 384 | spin_lock(&ctl->tree_lock); | ||
| 385 | ret = link_free_space(ctl, e); | ||
| 386 | spin_unlock(&ctl->tree_lock); | ||
| 387 | if (ret) { | ||
| 388 | printk(KERN_ERR "Duplicate entries in " | ||
| 389 | "free space cache, dumping\n"); | ||
| 390 | kunmap(page); | ||
| 391 | unlock_page(page); | ||
| 392 | page_cache_release(page); | ||
| 393 | goto free_cache; | ||
| 394 | } | ||
| 395 | } else { | ||
| 396 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
| 397 | if (!e->bitmap) { | ||
| 398 | kunmap(page); | ||
| 399 | kmem_cache_free( | ||
| 400 | btrfs_free_space_cachep, e); | ||
| 401 | unlock_page(page); | ||
| 402 | page_cache_release(page); | ||
| 403 | goto free_cache; | ||
| 404 | } | ||
| 405 | spin_lock(&ctl->tree_lock); | ||
| 406 | ret = link_free_space(ctl, e); | ||
| 407 | ctl->total_bitmaps++; | ||
| 408 | ctl->op->recalc_thresholds(ctl); | ||
| 409 | spin_unlock(&ctl->tree_lock); | ||
| 410 | if (ret) { | ||
| 411 | printk(KERN_ERR "Duplicate entries in " | ||
| 412 | "free space cache, dumping\n"); | ||
| 413 | kunmap(page); | ||
| 414 | unlock_page(page); | ||
| 415 | page_cache_release(page); | ||
| 416 | goto free_cache; | ||
| 417 | } | ||
| 418 | list_add_tail(&e->list, &bitmaps); | ||
| 419 | } | ||
| 420 | |||
| 421 | num_entries--; | ||
| 422 | offset += sizeof(struct btrfs_free_space_entry); | ||
| 423 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
| 424 | PAGE_CACHE_SIZE) | ||
| 425 | break; | ||
| 426 | entry++; | ||
| 427 | } | 697 | } |
| 428 | 698 | ||
| 429 | /* | 699 | num_entries--; |
| 430 | * We read an entry out of this page, we need to move on to the | 700 | } |
| 431 | * next page. | ||
| 432 | */ | ||
| 433 | if (need_loop) { | ||
| 434 | kunmap(page); | ||
| 435 | goto next; | ||
| 436 | } | ||
| 437 | 701 | ||
| 438 | /* | 702 | /* |
| 439 | * We add the bitmaps at the end of the entries in order that | 703 | * We add the bitmaps at the end of the entries in order that |
| 440 | * the bitmap entries are added to the cache. | 704 | * the bitmap entries are added to the cache. |
| 441 | */ | 705 | */ |
| 442 | e = list_entry(bitmaps.next, struct btrfs_free_space, list); | 706 | list_for_each_entry_safe(e, n, &bitmaps, list) { |
| 443 | list_del_init(&e->list); | 707 | list_del_init(&e->list); |
| 444 | memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); | 708 | ret = io_ctl_read_bitmap(&io_ctl, e); |
| 445 | kunmap(page); | 709 | if (ret) |
| 446 | num_bitmaps--; | 710 | goto free_cache; |
| 447 | next: | ||
| 448 | unlock_page(page); | ||
| 449 | page_cache_release(page); | ||
| 450 | index++; | ||
| 451 | } | 711 | } |
| 452 | 712 | ||
| 713 | io_ctl_drop_pages(&io_ctl); | ||
| 453 | ret = 1; | 714 | ret = 1; |
| 454 | out: | 715 | out: |
| 716 | io_ctl_free(&io_ctl); | ||
| 455 | return ret; | 717 | return ret; |
| 456 | free_cache: | 718 | free_cache: |
| 719 | io_ctl_drop_pages(&io_ctl); | ||
| 457 | __btrfs_remove_free_space_cache(ctl); | 720 | __btrfs_remove_free_space_cache(ctl); |
| 458 | goto out; | 721 | goto out; |
| 459 | } | 722 | } |
| @@ -465,7 +728,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
| 465 | struct btrfs_root *root = fs_info->tree_root; | 728 | struct btrfs_root *root = fs_info->tree_root; |
| 466 | struct inode *inode; | 729 | struct inode *inode; |
| 467 | struct btrfs_path *path; | 730 | struct btrfs_path *path; |
| 468 | int ret; | 731 | int ret = 0; |
| 469 | bool matched; | 732 | bool matched; |
| 470 | u64 used = btrfs_block_group_used(&block_group->item); | 733 | u64 used = btrfs_block_group_used(&block_group->item); |
| 471 | 734 | ||
| @@ -497,6 +760,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
| 497 | return 0; | 760 | return 0; |
| 498 | } | 761 | } |
| 499 | 762 | ||
| 763 | /* We may have converted the inode and made the cache invalid. */ | ||
| 764 | spin_lock(&block_group->lock); | ||
| 765 | if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { | ||
| 766 | spin_unlock(&block_group->lock); | ||
| 767 | goto out; | ||
| 768 | } | ||
| 769 | spin_unlock(&block_group->lock); | ||
| 770 | |||
| 500 | ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, | 771 | ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, |
| 501 | path, block_group->key.objectid); | 772 | path, block_group->key.objectid); |
| 502 | btrfs_free_path(path); | 773 | btrfs_free_path(path); |
| @@ -530,6 +801,19 @@ out: | |||
| 530 | return ret; | 801 | return ret; |
| 531 | } | 802 | } |
| 532 | 803 | ||
| 804 | /** | ||
| 805 | * __btrfs_write_out_cache - write out cached info to an inode | ||
| 806 | * @root - the root the inode belongs to | ||
| 807 | * @ctl - the free space cache we are going to write out | ||
| 808 | * @block_group - the block_group for this cache if it belongs to a block_group | ||
| 809 | * @trans - the trans handle | ||
| 810 | * @path - the path to use | ||
| 811 | * @offset - the offset for the key we'll insert | ||
| 812 | * | ||
| 813 | * This function writes out a free space cache struct to disk for quick recovery | ||
| 814 | * on mount. This will return 0 if it was successfull in writing the cache out, | ||
| 815 | * and -1 if it was not. | ||
| 816 | */ | ||
| 533 | int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | 817 | int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, |
| 534 | struct btrfs_free_space_ctl *ctl, | 818 | struct btrfs_free_space_ctl *ctl, |
| 535 | struct btrfs_block_group_cache *block_group, | 819 | struct btrfs_block_group_cache *block_group, |
| @@ -540,42 +824,24 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 540 | struct extent_buffer *leaf; | 824 | struct extent_buffer *leaf; |
| 541 | struct rb_node *node; | 825 | struct rb_node *node; |
| 542 | struct list_head *pos, *n; | 826 | struct list_head *pos, *n; |
| 543 | struct page **pages; | ||
| 544 | struct page *page; | ||
| 545 | struct extent_state *cached_state = NULL; | 827 | struct extent_state *cached_state = NULL; |
| 546 | struct btrfs_free_cluster *cluster = NULL; | 828 | struct btrfs_free_cluster *cluster = NULL; |
| 547 | struct extent_io_tree *unpin = NULL; | 829 | struct extent_io_tree *unpin = NULL; |
| 830 | struct io_ctl io_ctl; | ||
| 548 | struct list_head bitmap_list; | 831 | struct list_head bitmap_list; |
| 549 | struct btrfs_key key; | 832 | struct btrfs_key key; |
| 550 | u64 start, end, len; | 833 | u64 start, end, len; |
| 551 | u64 bytes = 0; | ||
| 552 | u32 crc = ~(u32)0; | ||
| 553 | int index = 0, num_pages = 0; | ||
| 554 | int entries = 0; | 834 | int entries = 0; |
| 555 | int bitmaps = 0; | 835 | int bitmaps = 0; |
| 556 | int ret = -1; | 836 | int ret; |
| 557 | bool next_page = false; | 837 | int err = -1; |
| 558 | bool out_of_space = false; | ||
| 559 | 838 | ||
| 560 | INIT_LIST_HEAD(&bitmap_list); | 839 | INIT_LIST_HEAD(&bitmap_list); |
| 561 | 840 | ||
| 562 | node = rb_first(&ctl->free_space_offset); | ||
| 563 | if (!node) | ||
| 564 | return 0; | ||
| 565 | |||
| 566 | if (!i_size_read(inode)) | 841 | if (!i_size_read(inode)) |
| 567 | return -1; | 842 | return -1; |
| 568 | 843 | ||
| 569 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 844 | io_ctl_init(&io_ctl, inode, root); |
| 570 | PAGE_CACHE_SHIFT; | ||
| 571 | |||
| 572 | filemap_write_and_wait(inode->i_mapping); | ||
| 573 | btrfs_wait_ordered_range(inode, inode->i_size & | ||
| 574 | ~(root->sectorsize - 1), (u64)-1); | ||
| 575 | |||
| 576 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
| 577 | if (!pages) | ||
| 578 | return -1; | ||
| 579 | 845 | ||
| 580 | /* Get the cluster for this block_group if it exists */ | 846 | /* Get the cluster for this block_group if it exists */ |
| 581 | if (block_group && !list_empty(&block_group->cluster_list)) | 847 | if (block_group && !list_empty(&block_group->cluster_list)) |
| @@ -589,30 +855,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 589 | */ | 855 | */ |
| 590 | unpin = root->fs_info->pinned_extents; | 856 | unpin = root->fs_info->pinned_extents; |
| 591 | 857 | ||
| 592 | /* | 858 | /* Lock all pages first so we can lock the extent safely. */ |
| 593 | * Lock all pages first so we can lock the extent safely. | 859 | io_ctl_prepare_pages(&io_ctl, inode, 0); |
| 594 | * | ||
| 595 | * NOTE: Because we hold the ref the entire time we're going to write to | ||
| 596 | * the page find_get_page should never fail, so we don't do a check | ||
| 597 | * after find_get_page at this point. Just putting this here so people | ||
| 598 | * know and don't freak out. | ||
| 599 | */ | ||
| 600 | while (index < num_pages) { | ||
| 601 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
| 602 | if (!page) { | ||
| 603 | int i; | ||
| 604 | |||
| 605 | for (i = 0; i < num_pages; i++) { | ||
| 606 | unlock_page(pages[i]); | ||
| 607 | page_cache_release(pages[i]); | ||
| 608 | } | ||
| 609 | goto out; | ||
| 610 | } | ||
| 611 | pages[index] = page; | ||
| 612 | index++; | ||
| 613 | } | ||
| 614 | 860 | ||
| 615 | index = 0; | ||
| 616 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 861 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
| 617 | 0, &cached_state, GFP_NOFS); | 862 | 0, &cached_state, GFP_NOFS); |
| 618 | 863 | ||
| @@ -623,189 +868,111 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 623 | if (block_group) | 868 | if (block_group) |
| 624 | start = block_group->key.objectid; | 869 | start = block_group->key.objectid; |
| 625 | 870 | ||
| 626 | /* Write out the extent entries */ | 871 | node = rb_first(&ctl->free_space_offset); |
| 627 | do { | 872 | if (!node && cluster) { |
| 628 | struct btrfs_free_space_entry *entry; | 873 | node = rb_first(&cluster->root); |
| 629 | void *addr, *orig; | 874 | cluster = NULL; |
| 630 | unsigned long offset = 0; | 875 | } |
| 631 | 876 | ||
| 632 | next_page = false; | 877 | /* Make sure we can fit our crcs into the first page */ |
| 878 | if (io_ctl.check_crcs && | ||
| 879 | (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { | ||
| 880 | WARN_ON(1); | ||
| 881 | goto out_nospc; | ||
| 882 | } | ||
| 633 | 883 | ||
| 634 | if (index >= num_pages) { | 884 | io_ctl_set_generation(&io_ctl, trans->transid); |
| 635 | out_of_space = true; | ||
| 636 | break; | ||
| 637 | } | ||
| 638 | 885 | ||
| 639 | page = pages[index]; | 886 | /* Write out the extent entries */ |
| 887 | while (node) { | ||
| 888 | struct btrfs_free_space *e; | ||
| 640 | 889 | ||
| 641 | orig = addr = kmap(page); | 890 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
| 642 | if (index == 0) { | 891 | entries++; |
| 643 | u64 *gen; | ||
| 644 | 892 | ||
| 645 | /* | 893 | ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, |
| 646 | * We're going to put in a bogus crc for this page to | 894 | e->bitmap); |
| 647 | * make sure that old kernels who aren't aware of this | 895 | if (ret) |
| 648 | * format will be sure to discard the cache. | 896 | goto out_nospc; |
| 649 | */ | ||
| 650 | addr += sizeof(u64); | ||
| 651 | offset += sizeof(u64); | ||
| 652 | 897 | ||
| 653 | gen = addr; | 898 | if (e->bitmap) { |
| 654 | *gen = trans->transid; | 899 | list_add_tail(&e->list, &bitmap_list); |
| 655 | addr += sizeof(u64); | 900 | bitmaps++; |
| 656 | offset += sizeof(u64); | ||
| 657 | } | 901 | } |
| 658 | entry = addr; | 902 | node = rb_next(node); |
| 659 | 903 | if (!node && cluster) { | |
| 660 | memset(addr, 0, PAGE_CACHE_SIZE - offset); | 904 | node = rb_first(&cluster->root); |
| 661 | while (node && !next_page) { | 905 | cluster = NULL; |
| 662 | struct btrfs_free_space *e; | ||
| 663 | |||
| 664 | e = rb_entry(node, struct btrfs_free_space, offset_index); | ||
| 665 | entries++; | ||
| 666 | |||
| 667 | entry->offset = cpu_to_le64(e->offset); | ||
| 668 | entry->bytes = cpu_to_le64(e->bytes); | ||
| 669 | if (e->bitmap) { | ||
| 670 | entry->type = BTRFS_FREE_SPACE_BITMAP; | ||
| 671 | list_add_tail(&e->list, &bitmap_list); | ||
| 672 | bitmaps++; | ||
| 673 | } else { | ||
| 674 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
| 675 | } | ||
| 676 | node = rb_next(node); | ||
| 677 | if (!node && cluster) { | ||
| 678 | node = rb_first(&cluster->root); | ||
| 679 | cluster = NULL; | ||
| 680 | } | ||
| 681 | offset += sizeof(struct btrfs_free_space_entry); | ||
| 682 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
| 683 | PAGE_CACHE_SIZE) | ||
| 684 | next_page = true; | ||
| 685 | entry++; | ||
| 686 | } | 906 | } |
| 907 | } | ||
| 687 | 908 | ||
| 688 | /* | 909 | /* |
| 689 | * We want to add any pinned extents to our free space cache | 910 | * We want to add any pinned extents to our free space cache |
| 690 | * so we don't leak the space | 911 | * so we don't leak the space |
| 691 | */ | 912 | */ |
| 692 | while (block_group && !next_page && | 913 | while (block_group && (start < block_group->key.objectid + |
| 693 | (start < block_group->key.objectid + | 914 | block_group->key.offset)) { |
| 694 | block_group->key.offset)) { | 915 | ret = find_first_extent_bit(unpin, start, &start, &end, |
| 695 | ret = find_first_extent_bit(unpin, start, &start, &end, | 916 | EXTENT_DIRTY); |
| 696 | EXTENT_DIRTY); | 917 | if (ret) { |
| 697 | if (ret) { | 918 | ret = 0; |
| 698 | ret = 0; | 919 | break; |
| 699 | break; | ||
| 700 | } | ||
| 701 | |||
| 702 | /* This pinned extent is out of our range */ | ||
| 703 | if (start >= block_group->key.objectid + | ||
| 704 | block_group->key.offset) | ||
| 705 | break; | ||
| 706 | |||
| 707 | len = block_group->key.objectid + | ||
| 708 | block_group->key.offset - start; | ||
| 709 | len = min(len, end + 1 - start); | ||
| 710 | |||
| 711 | entries++; | ||
| 712 | entry->offset = cpu_to_le64(start); | ||
| 713 | entry->bytes = cpu_to_le64(len); | ||
| 714 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
| 715 | |||
| 716 | start = end + 1; | ||
| 717 | offset += sizeof(struct btrfs_free_space_entry); | ||
| 718 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
| 719 | PAGE_CACHE_SIZE) | ||
| 720 | next_page = true; | ||
| 721 | entry++; | ||
| 722 | } | 920 | } |
| 723 | 921 | ||
| 724 | /* Generate bogus crc value */ | 922 | /* This pinned extent is out of our range */ |
| 725 | if (index == 0) { | 923 | if (start >= block_group->key.objectid + |
| 726 | u32 *tmp; | 924 | block_group->key.offset) |
| 727 | crc = btrfs_csum_data(root, orig + sizeof(u64), crc, | 925 | break; |
| 728 | PAGE_CACHE_SIZE - sizeof(u64)); | ||
| 729 | btrfs_csum_final(crc, (char *)&crc); | ||
| 730 | crc++; | ||
| 731 | tmp = orig; | ||
| 732 | *tmp = crc; | ||
| 733 | } | ||
| 734 | 926 | ||
| 735 | kunmap(page); | 927 | len = block_group->key.objectid + |
| 928 | block_group->key.offset - start; | ||
| 929 | len = min(len, end + 1 - start); | ||
| 736 | 930 | ||
| 737 | bytes += PAGE_CACHE_SIZE; | 931 | entries++; |
| 932 | ret = io_ctl_add_entry(&io_ctl, start, len, NULL); | ||
| 933 | if (ret) | ||
| 934 | goto out_nospc; | ||
| 738 | 935 | ||
| 739 | index++; | 936 | start = end + 1; |
| 740 | } while (node || next_page); | 937 | } |
| 741 | 938 | ||
| 742 | /* Write out the bitmaps */ | 939 | /* Write out the bitmaps */ |
| 743 | list_for_each_safe(pos, n, &bitmap_list) { | 940 | list_for_each_safe(pos, n, &bitmap_list) { |
| 744 | void *addr; | ||
| 745 | struct btrfs_free_space *entry = | 941 | struct btrfs_free_space *entry = |
| 746 | list_entry(pos, struct btrfs_free_space, list); | 942 | list_entry(pos, struct btrfs_free_space, list); |
| 747 | 943 | ||
| 748 | if (index >= num_pages) { | 944 | ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); |
| 749 | out_of_space = true; | 945 | if (ret) |
| 750 | break; | 946 | goto out_nospc; |
| 751 | } | ||
| 752 | page = pages[index]; | ||
| 753 | |||
| 754 | addr = kmap(page); | ||
| 755 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | ||
| 756 | kunmap(page); | ||
| 757 | bytes += PAGE_CACHE_SIZE; | ||
| 758 | |||
| 759 | list_del_init(&entry->list); | 947 | list_del_init(&entry->list); |
| 760 | index++; | ||
| 761 | } | ||
| 762 | |||
| 763 | if (out_of_space) { | ||
| 764 | btrfs_drop_pages(pages, num_pages); | ||
| 765 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
| 766 | i_size_read(inode) - 1, &cached_state, | ||
| 767 | GFP_NOFS); | ||
| 768 | ret = 0; | ||
| 769 | goto out; | ||
| 770 | } | 948 | } |
| 771 | 949 | ||
| 772 | /* Zero out the rest of the pages just to make sure */ | 950 | /* Zero out the rest of the pages just to make sure */ |
| 773 | while (index < num_pages) { | 951 | io_ctl_zero_remaining_pages(&io_ctl); |
| 774 | void *addr; | ||
| 775 | |||
| 776 | page = pages[index]; | ||
| 777 | addr = kmap(page); | ||
| 778 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
| 779 | kunmap(page); | ||
| 780 | bytes += PAGE_CACHE_SIZE; | ||
| 781 | index++; | ||
| 782 | } | ||
| 783 | 952 | ||
| 784 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | 953 | ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, |
| 785 | bytes, &cached_state); | 954 | 0, i_size_read(inode), &cached_state); |
| 786 | btrfs_drop_pages(pages, num_pages); | 955 | io_ctl_drop_pages(&io_ctl); |
| 787 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 956 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
| 788 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 957 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
| 789 | 958 | ||
| 790 | if (ret) { | 959 | if (ret) |
| 791 | ret = 0; | ||
| 792 | goto out; | 960 | goto out; |
| 793 | } | ||
| 794 | 961 | ||
| 795 | BTRFS_I(inode)->generation = trans->transid; | ||
| 796 | 962 | ||
| 797 | filemap_write_and_wait(inode->i_mapping); | 963 | ret = filemap_write_and_wait(inode->i_mapping); |
| 964 | if (ret) | ||
| 965 | goto out; | ||
| 798 | 966 | ||
| 799 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 967 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
| 800 | key.offset = offset; | 968 | key.offset = offset; |
| 801 | key.type = 0; | 969 | key.type = 0; |
| 802 | 970 | ||
| 803 | ret = btrfs_search_slot(trans, root, &key, path, 1, 1); | 971 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
| 804 | if (ret < 0) { | 972 | if (ret < 0) { |
| 805 | ret = -1; | 973 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, |
| 806 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 974 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, |
| 807 | EXTENT_DIRTY | EXTENT_DELALLOC | | 975 | GFP_NOFS); |
| 808 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | ||
| 809 | goto out; | 976 | goto out; |
| 810 | } | 977 | } |
| 811 | leaf = path->nodes[0]; | 978 | leaf = path->nodes[0]; |
| @@ -816,15 +983,16 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 816 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 983 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
| 817 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || | 984 | if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || |
| 818 | found_key.offset != offset) { | 985 | found_key.offset != offset) { |
| 819 | ret = -1; | 986 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, |
| 820 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 987 | inode->i_size - 1, |
| 821 | EXTENT_DIRTY | EXTENT_DELALLOC | | 988 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, |
| 822 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | 989 | NULL, GFP_NOFS); |
| 823 | GFP_NOFS); | ||
| 824 | btrfs_release_path(path); | 990 | btrfs_release_path(path); |
| 825 | goto out; | 991 | goto out; |
| 826 | } | 992 | } |
| 827 | } | 993 | } |
| 994 | |||
| 995 | BTRFS_I(inode)->generation = trans->transid; | ||
| 828 | header = btrfs_item_ptr(leaf, path->slots[0], | 996 | header = btrfs_item_ptr(leaf, path->slots[0], |
| 829 | struct btrfs_free_space_header); | 997 | struct btrfs_free_space_header); |
| 830 | btrfs_set_free_space_entries(leaf, header, entries); | 998 | btrfs_set_free_space_entries(leaf, header, entries); |
| @@ -833,16 +1001,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 833 | btrfs_mark_buffer_dirty(leaf); | 1001 | btrfs_mark_buffer_dirty(leaf); |
| 834 | btrfs_release_path(path); | 1002 | btrfs_release_path(path); |
| 835 | 1003 | ||
| 836 | ret = 1; | 1004 | err = 0; |
| 837 | |||
| 838 | out: | 1005 | out: |
| 839 | kfree(pages); | 1006 | io_ctl_free(&io_ctl); |
| 840 | if (ret != 1) { | 1007 | if (err) { |
| 841 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 1008 | invalidate_inode_pages2(inode->i_mapping); |
| 842 | BTRFS_I(inode)->generation = 0; | 1009 | BTRFS_I(inode)->generation = 0; |
| 843 | } | 1010 | } |
| 844 | btrfs_update_inode(trans, root, inode); | 1011 | btrfs_update_inode(trans, root, inode); |
| 845 | return ret; | 1012 | return err; |
| 1013 | |||
| 1014 | out_nospc: | ||
| 1015 | list_for_each_safe(pos, n, &bitmap_list) { | ||
| 1016 | struct btrfs_free_space *entry = | ||
| 1017 | list_entry(pos, struct btrfs_free_space, list); | ||
| 1018 | list_del_init(&entry->list); | ||
| 1019 | } | ||
| 1020 | io_ctl_drop_pages(&io_ctl); | ||
| 1021 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
| 1022 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | ||
| 1023 | goto out; | ||
| 846 | } | 1024 | } |
| 847 | 1025 | ||
| 848 | int btrfs_write_out_cache(struct btrfs_root *root, | 1026 | int btrfs_write_out_cache(struct btrfs_root *root, |
| @@ -869,14 +1047,15 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
| 869 | 1047 | ||
| 870 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, | 1048 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, |
| 871 | path, block_group->key.objectid); | 1049 | path, block_group->key.objectid); |
| 872 | if (ret < 0) { | 1050 | if (ret) { |
| 873 | spin_lock(&block_group->lock); | 1051 | spin_lock(&block_group->lock); |
| 874 | block_group->disk_cache_state = BTRFS_DC_ERROR; | 1052 | block_group->disk_cache_state = BTRFS_DC_ERROR; |
| 875 | spin_unlock(&block_group->lock); | 1053 | spin_unlock(&block_group->lock); |
| 876 | ret = 0; | 1054 | ret = 0; |
| 877 | 1055 | #ifdef DEBUG | |
| 878 | printk(KERN_ERR "btrfs: failed to write free space cace " | 1056 | printk(KERN_ERR "btrfs: failed to write free space cace " |
| 879 | "for block group %llu\n", block_group->key.objectid); | 1057 | "for block group %llu\n", block_group->key.objectid); |
| 1058 | #endif | ||
| 880 | } | 1059 | } |
| 881 | 1060 | ||
| 882 | iput(inode); | 1061 | iput(inode); |
| @@ -1701,6 +1880,7 @@ again: | |||
| 1701 | ctl->total_bitmaps--; | 1880 | ctl->total_bitmaps--; |
| 1702 | } | 1881 | } |
| 1703 | kmem_cache_free(btrfs_free_space_cachep, info); | 1882 | kmem_cache_free(btrfs_free_space_cachep, info); |
| 1883 | ret = 0; | ||
| 1704 | goto out_lock; | 1884 | goto out_lock; |
| 1705 | } | 1885 | } |
| 1706 | 1886 | ||
| @@ -1708,7 +1888,8 @@ again: | |||
| 1708 | unlink_free_space(ctl, info); | 1888 | unlink_free_space(ctl, info); |
| 1709 | info->offset += bytes; | 1889 | info->offset += bytes; |
| 1710 | info->bytes -= bytes; | 1890 | info->bytes -= bytes; |
| 1711 | link_free_space(ctl, info); | 1891 | ret = link_free_space(ctl, info); |
| 1892 | WARN_ON(ret); | ||
| 1712 | goto out_lock; | 1893 | goto out_lock; |
| 1713 | } | 1894 | } |
| 1714 | 1895 | ||
| @@ -2472,9 +2653,19 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
| 2472 | spin_unlock(&ctl->tree_lock); | 2653 | spin_unlock(&ctl->tree_lock); |
| 2473 | 2654 | ||
| 2474 | if (bytes >= minlen) { | 2655 | if (bytes >= minlen) { |
| 2475 | int update_ret; | 2656 | struct btrfs_space_info *space_info; |
| 2476 | update_ret = btrfs_update_reserved_bytes(block_group, | 2657 | int update = 0; |
| 2477 | bytes, 1, 1); | 2658 | |
| 2659 | space_info = block_group->space_info; | ||
| 2660 | spin_lock(&space_info->lock); | ||
| 2661 | spin_lock(&block_group->lock); | ||
| 2662 | if (!block_group->ro) { | ||
| 2663 | block_group->reserved += bytes; | ||
| 2664 | space_info->bytes_reserved += bytes; | ||
| 2665 | update = 1; | ||
| 2666 | } | ||
| 2667 | spin_unlock(&block_group->lock); | ||
| 2668 | spin_unlock(&space_info->lock); | ||
| 2478 | 2669 | ||
| 2479 | ret = btrfs_error_discard_extent(fs_info->extent_root, | 2670 | ret = btrfs_error_discard_extent(fs_info->extent_root, |
| 2480 | start, | 2671 | start, |
| @@ -2482,9 +2673,16 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
| 2482 | &actually_trimmed); | 2673 | &actually_trimmed); |
| 2483 | 2674 | ||
| 2484 | btrfs_add_free_space(block_group, start, bytes); | 2675 | btrfs_add_free_space(block_group, start, bytes); |
| 2485 | if (!update_ret) | 2676 | if (update) { |
| 2486 | btrfs_update_reserved_bytes(block_group, | 2677 | spin_lock(&space_info->lock); |
| 2487 | bytes, 0, 1); | 2678 | spin_lock(&block_group->lock); |
| 2679 | if (block_group->ro) | ||
| 2680 | space_info->bytes_readonly += bytes; | ||
| 2681 | block_group->reserved -= bytes; | ||
| 2682 | space_info->bytes_reserved -= bytes; | ||
| 2683 | spin_unlock(&space_info->lock); | ||
| 2684 | spin_unlock(&block_group->lock); | ||
| 2685 | } | ||
| 2488 | 2686 | ||
| 2489 | if (ret) | 2687 | if (ret) |
| 2490 | break; | 2688 | break; |
| @@ -2643,9 +2841,13 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, | |||
| 2643 | return 0; | 2841 | return 0; |
| 2644 | 2842 | ||
| 2645 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); | 2843 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); |
| 2646 | if (ret < 0) | 2844 | if (ret) { |
| 2845 | btrfs_delalloc_release_metadata(inode, inode->i_size); | ||
| 2846 | #ifdef DEBUG | ||
| 2647 | printk(KERN_ERR "btrfs: failed to write free ino cache " | 2847 | printk(KERN_ERR "btrfs: failed to write free ino cache " |
| 2648 | "for root %llu\n", root->root_key.objectid); | 2848 | "for root %llu\n", root->root_key.objectid); |
| 2849 | #endif | ||
| 2850 | } | ||
| 2649 | 2851 | ||
| 2650 | iput(inode); | 2852 | iput(inode); |
| 2651 | return ret; | 2853 | return ret; |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b4087e0fa871..53dcbdf446cd 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
| @@ -465,14 +465,16 @@ again: | |||
| 465 | /* Just to make sure we have enough space */ | 465 | /* Just to make sure we have enough space */ |
| 466 | prealloc += 8 * PAGE_CACHE_SIZE; | 466 | prealloc += 8 * PAGE_CACHE_SIZE; |
| 467 | 467 | ||
| 468 | ret = btrfs_check_data_free_space(inode, prealloc); | 468 | ret = btrfs_delalloc_reserve_space(inode, prealloc); |
| 469 | if (ret) | 469 | if (ret) |
| 470 | goto out_put; | 470 | goto out_put; |
| 471 | 471 | ||
| 472 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, | 472 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, |
| 473 | prealloc, prealloc, &alloc_hint); | 473 | prealloc, prealloc, &alloc_hint); |
| 474 | if (ret) | 474 | if (ret) { |
| 475 | btrfs_delalloc_release_space(inode, prealloc); | ||
| 475 | goto out_put; | 476 | goto out_put; |
| 477 | } | ||
| 476 | btrfs_free_reserved_data_space(inode, prealloc); | 478 | btrfs_free_reserved_data_space(inode, prealloc); |
| 477 | 479 | ||
| 478 | out_put: | 480 | out_put: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 75686a61bd45..966ddcc4c63d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -45,10 +45,10 @@ | |||
| 45 | #include "btrfs_inode.h" | 45 | #include "btrfs_inode.h" |
| 46 | #include "ioctl.h" | 46 | #include "ioctl.h" |
| 47 | #include "print-tree.h" | 47 | #include "print-tree.h" |
| 48 | #include "volumes.h" | ||
| 49 | #include "ordered-data.h" | 48 | #include "ordered-data.h" |
| 50 | #include "xattr.h" | 49 | #include "xattr.h" |
| 51 | #include "tree-log.h" | 50 | #include "tree-log.h" |
| 51 | #include "volumes.h" | ||
| 52 | #include "compression.h" | 52 | #include "compression.h" |
| 53 | #include "locking.h" | 53 | #include "locking.h" |
| 54 | #include "free-space-cache.h" | 54 | #include "free-space-cache.h" |
| @@ -393,7 +393,10 @@ again: | |||
| 393 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | 393 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { |
| 394 | WARN_ON(pages); | 394 | WARN_ON(pages); |
| 395 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 395 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
| 396 | BUG_ON(!pages); | 396 | if (!pages) { |
| 397 | /* just bail out to the uncompressed code */ | ||
| 398 | goto cont; | ||
| 399 | } | ||
| 397 | 400 | ||
| 398 | if (BTRFS_I(inode)->force_compress) | 401 | if (BTRFS_I(inode)->force_compress) |
| 399 | compress_type = BTRFS_I(inode)->force_compress; | 402 | compress_type = BTRFS_I(inode)->force_compress; |
| @@ -424,6 +427,7 @@ again: | |||
| 424 | will_compress = 1; | 427 | will_compress = 1; |
| 425 | } | 428 | } |
| 426 | } | 429 | } |
| 430 | cont: | ||
| 427 | if (start == 0) { | 431 | if (start == 0) { |
| 428 | trans = btrfs_join_transaction(root); | 432 | trans = btrfs_join_transaction(root); |
| 429 | BUG_ON(IS_ERR(trans)); | 433 | BUG_ON(IS_ERR(trans)); |
| @@ -820,7 +824,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 820 | } | 824 | } |
| 821 | 825 | ||
| 822 | BUG_ON(disk_num_bytes > | 826 | BUG_ON(disk_num_bytes > |
| 823 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 827 | btrfs_super_total_bytes(root->fs_info->super_copy)); |
| 824 | 828 | ||
| 825 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | 829 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); |
| 826 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 830 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
| @@ -1792,12 +1796,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1792 | } | 1796 | } |
| 1793 | ret = 0; | 1797 | ret = 0; |
| 1794 | out: | 1798 | out: |
| 1795 | if (nolock) { | 1799 | if (root != root->fs_info->tree_root) |
| 1796 | if (trans) | ||
| 1797 | btrfs_end_transaction_nolock(trans, root); | ||
| 1798 | } else { | ||
| 1799 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1800 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
| 1800 | if (trans) | 1801 | if (trans) { |
| 1802 | if (nolock) | ||
| 1803 | btrfs_end_transaction_nolock(trans, root); | ||
| 1804 | else | ||
| 1801 | btrfs_end_transaction(trans, root); | 1805 | btrfs_end_transaction(trans, root); |
| 1802 | } | 1806 | } |
| 1803 | 1807 | ||
| @@ -1819,153 +1823,9 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 1819 | } | 1823 | } |
| 1820 | 1824 | ||
| 1821 | /* | 1825 | /* |
| 1822 | * When IO fails, either with EIO or csum verification fails, we | ||
| 1823 | * try other mirrors that might have a good copy of the data. This | ||
| 1824 | * io_failure_record is used to record state as we go through all the | ||
| 1825 | * mirrors. If another mirror has good data, the page is set up to date | ||
| 1826 | * and things continue. If a good mirror can't be found, the original | ||
| 1827 | * bio end_io callback is called to indicate things have failed. | ||
| 1828 | */ | ||
| 1829 | struct io_failure_record { | ||
| 1830 | struct page *page; | ||
| 1831 | u64 start; | ||
| 1832 | u64 len; | ||
| 1833 | u64 logical; | ||
| 1834 | unsigned long bio_flags; | ||
| 1835 | int last_mirror; | ||
| 1836 | }; | ||
| 1837 | |||
| 1838 | static int btrfs_io_failed_hook(struct bio *failed_bio, | ||
| 1839 | struct page *page, u64 start, u64 end, | ||
| 1840 | struct extent_state *state) | ||
| 1841 | { | ||
| 1842 | struct io_failure_record *failrec = NULL; | ||
| 1843 | u64 private; | ||
| 1844 | struct extent_map *em; | ||
| 1845 | struct inode *inode = page->mapping->host; | ||
| 1846 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
| 1847 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 1848 | struct bio *bio; | ||
| 1849 | int num_copies; | ||
| 1850 | int ret; | ||
| 1851 | int rw; | ||
| 1852 | u64 logical; | ||
| 1853 | |||
| 1854 | ret = get_state_private(failure_tree, start, &private); | ||
| 1855 | if (ret) { | ||
| 1856 | failrec = kmalloc(sizeof(*failrec), GFP_NOFS); | ||
| 1857 | if (!failrec) | ||
| 1858 | return -ENOMEM; | ||
| 1859 | failrec->start = start; | ||
| 1860 | failrec->len = end - start + 1; | ||
| 1861 | failrec->last_mirror = 0; | ||
| 1862 | failrec->bio_flags = 0; | ||
| 1863 | |||
| 1864 | read_lock(&em_tree->lock); | ||
| 1865 | em = lookup_extent_mapping(em_tree, start, failrec->len); | ||
| 1866 | if (em->start > start || em->start + em->len < start) { | ||
| 1867 | free_extent_map(em); | ||
| 1868 | em = NULL; | ||
| 1869 | } | ||
| 1870 | read_unlock(&em_tree->lock); | ||
| 1871 | |||
| 1872 | if (IS_ERR_OR_NULL(em)) { | ||
| 1873 | kfree(failrec); | ||
| 1874 | return -EIO; | ||
| 1875 | } | ||
| 1876 | logical = start - em->start; | ||
| 1877 | logical = em->block_start + logical; | ||
| 1878 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | ||
| 1879 | logical = em->block_start; | ||
| 1880 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | ||
| 1881 | extent_set_compress_type(&failrec->bio_flags, | ||
| 1882 | em->compress_type); | ||
| 1883 | } | ||
| 1884 | failrec->logical = logical; | ||
| 1885 | free_extent_map(em); | ||
| 1886 | set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | | ||
| 1887 | EXTENT_DIRTY, GFP_NOFS); | ||
| 1888 | set_state_private(failure_tree, start, | ||
| 1889 | (u64)(unsigned long)failrec); | ||
| 1890 | } else { | ||
| 1891 | failrec = (struct io_failure_record *)(unsigned long)private; | ||
| 1892 | } | ||
| 1893 | num_copies = btrfs_num_copies( | ||
| 1894 | &BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
| 1895 | failrec->logical, failrec->len); | ||
| 1896 | failrec->last_mirror++; | ||
| 1897 | if (!state) { | ||
| 1898 | spin_lock(&BTRFS_I(inode)->io_tree.lock); | ||
| 1899 | state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, | ||
| 1900 | failrec->start, | ||
| 1901 | EXTENT_LOCKED); | ||
| 1902 | if (state && state->start != failrec->start) | ||
| 1903 | state = NULL; | ||
| 1904 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); | ||
| 1905 | } | ||
| 1906 | if (!state || failrec->last_mirror > num_copies) { | ||
| 1907 | set_state_private(failure_tree, failrec->start, 0); | ||
| 1908 | clear_extent_bits(failure_tree, failrec->start, | ||
| 1909 | failrec->start + failrec->len - 1, | ||
| 1910 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | ||
| 1911 | kfree(failrec); | ||
| 1912 | return -EIO; | ||
| 1913 | } | ||
| 1914 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 1915 | bio->bi_private = state; | ||
| 1916 | bio->bi_end_io = failed_bio->bi_end_io; | ||
| 1917 | bio->bi_sector = failrec->logical >> 9; | ||
| 1918 | bio->bi_bdev = failed_bio->bi_bdev; | ||
| 1919 | bio->bi_size = 0; | ||
| 1920 | |||
| 1921 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | ||
| 1922 | if (failed_bio->bi_rw & REQ_WRITE) | ||
| 1923 | rw = WRITE; | ||
| 1924 | else | ||
| 1925 | rw = READ; | ||
| 1926 | |||
| 1927 | ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | ||
| 1928 | failrec->last_mirror, | ||
| 1929 | failrec->bio_flags, 0); | ||
| 1930 | return ret; | ||
| 1931 | } | ||
| 1932 | |||
| 1933 | /* | ||
| 1934 | * each time an IO finishes, we do a fast check in the IO failure tree | ||
| 1935 | * to see if we need to process or clean up an io_failure_record | ||
| 1936 | */ | ||
| 1937 | static int btrfs_clean_io_failures(struct inode *inode, u64 start) | ||
| 1938 | { | ||
| 1939 | u64 private; | ||
| 1940 | u64 private_failure; | ||
| 1941 | struct io_failure_record *failure; | ||
| 1942 | int ret; | ||
| 1943 | |||
| 1944 | private = 0; | ||
| 1945 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | ||
| 1946 | (u64)-1, 1, EXTENT_DIRTY, 0)) { | ||
| 1947 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
| 1948 | start, &private_failure); | ||
| 1949 | if (ret == 0) { | ||
| 1950 | failure = (struct io_failure_record *)(unsigned long) | ||
| 1951 | private_failure; | ||
| 1952 | set_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
| 1953 | failure->start, 0); | ||
| 1954 | clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, | ||
| 1955 | failure->start, | ||
| 1956 | failure->start + failure->len - 1, | ||
| 1957 | EXTENT_DIRTY | EXTENT_LOCKED, | ||
| 1958 | GFP_NOFS); | ||
| 1959 | kfree(failure); | ||
| 1960 | } | ||
| 1961 | } | ||
| 1962 | return 0; | ||
| 1963 | } | ||
| 1964 | |||
| 1965 | /* | ||
| 1966 | * when reads are done, we need to check csums to verify the data is correct | 1826 | * when reads are done, we need to check csums to verify the data is correct |
| 1967 | * if there's a match, we allow the bio to finish. If not, we go through | 1827 | * if there's a match, we allow the bio to finish. If not, the code in |
| 1968 | * the io_failure_record routines to find good copies | 1828 | * extent_io.c will try to find good copies for us. |
| 1969 | */ | 1829 | */ |
| 1970 | static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 1830 | static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
| 1971 | struct extent_state *state) | 1831 | struct extent_state *state) |
| @@ -2011,10 +1871,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 2011 | 1871 | ||
| 2012 | kunmap_atomic(kaddr, KM_USER0); | 1872 | kunmap_atomic(kaddr, KM_USER0); |
| 2013 | good: | 1873 | good: |
| 2014 | /* if the io failure tree for this inode is non-empty, | ||
| 2015 | * check to see if we've recovered from a failed IO | ||
| 2016 | */ | ||
| 2017 | btrfs_clean_io_failures(inode, start); | ||
| 2018 | return 0; | 1874 | return 0; |
| 2019 | 1875 | ||
| 2020 | zeroit: | 1876 | zeroit: |
| @@ -2079,89 +1935,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
| 2079 | up_read(&root->fs_info->cleanup_work_sem); | 1935 | up_read(&root->fs_info->cleanup_work_sem); |
| 2080 | } | 1936 | } |
| 2081 | 1937 | ||
| 2082 | /* | ||
| 2083 | * calculate extra metadata reservation when snapshotting a subvolume | ||
| 2084 | * contains orphan files. | ||
| 2085 | */ | ||
| 2086 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2087 | struct btrfs_pending_snapshot *pending, | ||
| 2088 | u64 *bytes_to_reserve) | ||
| 2089 | { | ||
| 2090 | struct btrfs_root *root; | ||
| 2091 | struct btrfs_block_rsv *block_rsv; | ||
| 2092 | u64 num_bytes; | ||
| 2093 | int index; | ||
| 2094 | |||
| 2095 | root = pending->root; | ||
| 2096 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
| 2097 | return; | ||
| 2098 | |||
| 2099 | block_rsv = root->orphan_block_rsv; | ||
| 2100 | |||
| 2101 | /* orphan block reservation for the snapshot */ | ||
| 2102 | num_bytes = block_rsv->size; | ||
| 2103 | |||
| 2104 | /* | ||
| 2105 | * after the snapshot is created, COWing tree blocks may use more | ||
| 2106 | * space than it frees. So we should make sure there is enough | ||
| 2107 | * reserved space. | ||
| 2108 | */ | ||
| 2109 | index = trans->transid & 0x1; | ||
| 2110 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
| 2111 | num_bytes += block_rsv->size - | ||
| 2112 | (block_rsv->reserved + block_rsv->freed[index]); | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | *bytes_to_reserve += num_bytes; | ||
| 2116 | } | ||
| 2117 | |||
| 2118 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2119 | struct btrfs_pending_snapshot *pending) | ||
| 2120 | { | ||
| 2121 | struct btrfs_root *root = pending->root; | ||
| 2122 | struct btrfs_root *snap = pending->snap; | ||
| 2123 | struct btrfs_block_rsv *block_rsv; | ||
| 2124 | u64 num_bytes; | ||
| 2125 | int index; | ||
| 2126 | int ret; | ||
| 2127 | |||
| 2128 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
| 2129 | return; | ||
| 2130 | |||
| 2131 | /* refill source subvolume's orphan block reservation */ | ||
| 2132 | block_rsv = root->orphan_block_rsv; | ||
| 2133 | index = trans->transid & 0x1; | ||
| 2134 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
| 2135 | num_bytes = block_rsv->size - | ||
| 2136 | (block_rsv->reserved + block_rsv->freed[index]); | ||
| 2137 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 2138 | root->orphan_block_rsv, | ||
| 2139 | num_bytes); | ||
| 2140 | BUG_ON(ret); | ||
| 2141 | } | ||
| 2142 | |||
| 2143 | /* setup orphan block reservation for the snapshot */ | ||
| 2144 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
| 2145 | BUG_ON(!block_rsv); | ||
| 2146 | |||
| 2147 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
| 2148 | snap->orphan_block_rsv = block_rsv; | ||
| 2149 | |||
| 2150 | num_bytes = root->orphan_block_rsv->size; | ||
| 2151 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 2152 | block_rsv, num_bytes); | ||
| 2153 | BUG_ON(ret); | ||
| 2154 | |||
| 2155 | #if 0 | ||
| 2156 | /* insert orphan item for the snapshot */ | ||
| 2157 | WARN_ON(!root->orphan_item_inserted); | ||
| 2158 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
| 2159 | snap->root_key.objectid); | ||
| 2160 | BUG_ON(ret); | ||
| 2161 | snap->orphan_item_inserted = 1; | ||
| 2162 | #endif | ||
| 2163 | } | ||
| 2164 | |||
| 2165 | enum btrfs_orphan_cleanup_state { | 1938 | enum btrfs_orphan_cleanup_state { |
| 2166 | ORPHAN_CLEANUP_STARTED = 1, | 1939 | ORPHAN_CLEANUP_STARTED = 1, |
| 2167 | ORPHAN_CLEANUP_DONE = 2, | 1940 | ORPHAN_CLEANUP_DONE = 2, |
| @@ -2247,9 +2020,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2247 | } | 2020 | } |
| 2248 | spin_unlock(&root->orphan_lock); | 2021 | spin_unlock(&root->orphan_lock); |
| 2249 | 2022 | ||
| 2250 | if (block_rsv) | ||
| 2251 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
| 2252 | |||
| 2253 | /* grab metadata reservation from transaction handle */ | 2023 | /* grab metadata reservation from transaction handle */ |
| 2254 | if (reserve) { | 2024 | if (reserve) { |
| 2255 | ret = btrfs_orphan_reserve_metadata(trans, inode); | 2025 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
| @@ -2316,6 +2086,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2316 | struct btrfs_key key, found_key; | 2086 | struct btrfs_key key, found_key; |
| 2317 | struct btrfs_trans_handle *trans; | 2087 | struct btrfs_trans_handle *trans; |
| 2318 | struct inode *inode; | 2088 | struct inode *inode; |
| 2089 | u64 last_objectid = 0; | ||
| 2319 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2090 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
| 2320 | 2091 | ||
| 2321 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) | 2092 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
| @@ -2367,41 +2138,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2367 | * crossing root thing. we store the inode number in the | 2138 | * crossing root thing. we store the inode number in the |
| 2368 | * offset of the orphan item. | 2139 | * offset of the orphan item. |
| 2369 | */ | 2140 | */ |
| 2141 | |||
| 2142 | if (found_key.offset == last_objectid) { | ||
| 2143 | printk(KERN_ERR "btrfs: Error removing orphan entry, " | ||
| 2144 | "stopping orphan cleanup\n"); | ||
| 2145 | ret = -EINVAL; | ||
| 2146 | goto out; | ||
| 2147 | } | ||
| 2148 | |||
| 2149 | last_objectid = found_key.offset; | ||
| 2150 | |||
| 2370 | found_key.objectid = found_key.offset; | 2151 | found_key.objectid = found_key.offset; |
| 2371 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2152 | found_key.type = BTRFS_INODE_ITEM_KEY; |
| 2372 | found_key.offset = 0; | 2153 | found_key.offset = 0; |
| 2373 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2154 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
| 2374 | if (IS_ERR(inode)) { | 2155 | ret = PTR_RET(inode); |
| 2375 | ret = PTR_ERR(inode); | 2156 | if (ret && ret != -ESTALE) |
| 2376 | goto out; | 2157 | goto out; |
| 2377 | } | ||
| 2378 | 2158 | ||
| 2379 | /* | 2159 | /* |
| 2380 | * add this inode to the orphan list so btrfs_orphan_del does | 2160 | * Inode is already gone but the orphan item is still there, |
| 2381 | * the proper thing when we hit it | 2161 | * kill the orphan item. |
| 2382 | */ | 2162 | */ |
| 2383 | spin_lock(&root->orphan_lock); | 2163 | if (ret == -ESTALE) { |
| 2384 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2164 | trans = btrfs_start_transaction(root, 1); |
| 2385 | spin_unlock(&root->orphan_lock); | ||
| 2386 | |||
| 2387 | /* | ||
| 2388 | * if this is a bad inode, means we actually succeeded in | ||
| 2389 | * removing the inode, but not the orphan record, which means | ||
| 2390 | * we need to manually delete the orphan since iput will just | ||
| 2391 | * do a destroy_inode | ||
| 2392 | */ | ||
| 2393 | if (is_bad_inode(inode)) { | ||
| 2394 | trans = btrfs_start_transaction(root, 0); | ||
| 2395 | if (IS_ERR(trans)) { | 2165 | if (IS_ERR(trans)) { |
| 2396 | ret = PTR_ERR(trans); | 2166 | ret = PTR_ERR(trans); |
| 2397 | goto out; | 2167 | goto out; |
| 2398 | } | 2168 | } |
| 2399 | btrfs_orphan_del(trans, inode); | 2169 | ret = btrfs_del_orphan_item(trans, root, |
| 2170 | found_key.objectid); | ||
| 2171 | BUG_ON(ret); | ||
| 2400 | btrfs_end_transaction(trans, root); | 2172 | btrfs_end_transaction(trans, root); |
| 2401 | iput(inode); | ||
| 2402 | continue; | 2173 | continue; |
| 2403 | } | 2174 | } |
| 2404 | 2175 | ||
| 2176 | /* | ||
| 2177 | * add this inode to the orphan list so btrfs_orphan_del does | ||
| 2178 | * the proper thing when we hit it | ||
| 2179 | */ | ||
| 2180 | spin_lock(&root->orphan_lock); | ||
| 2181 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
| 2182 | spin_unlock(&root->orphan_lock); | ||
| 2183 | |||
| 2405 | /* if we have links, this was a truncate, lets do that */ | 2184 | /* if we have links, this was a truncate, lets do that */ |
| 2406 | if (inode->i_nlink) { | 2185 | if (inode->i_nlink) { |
| 2407 | if (!S_ISREG(inode->i_mode)) { | 2186 | if (!S_ISREG(inode->i_mode)) { |
| @@ -2835,7 +2614,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 2835 | u64 ino = btrfs_ino(inode); | 2614 | u64 ino = btrfs_ino(inode); |
| 2836 | u64 dir_ino = btrfs_ino(dir); | 2615 | u64 dir_ino = btrfs_ino(dir); |
| 2837 | 2616 | ||
| 2838 | trans = btrfs_start_transaction(root, 10); | 2617 | /* |
| 2618 | * 1 for the possible orphan item | ||
| 2619 | * 1 for the dir item | ||
| 2620 | * 1 for the dir index | ||
| 2621 | * 1 for the inode ref | ||
| 2622 | * 1 for the inode ref in the tree log | ||
| 2623 | * 2 for the dir entries in the log | ||
| 2624 | * 1 for the inode | ||
| 2625 | */ | ||
| 2626 | trans = btrfs_start_transaction(root, 8); | ||
| 2839 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 2627 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
| 2840 | return trans; | 2628 | return trans; |
| 2841 | 2629 | ||
| @@ -2858,7 +2646,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 2858 | return ERR_PTR(-ENOMEM); | 2646 | return ERR_PTR(-ENOMEM); |
| 2859 | } | 2647 | } |
| 2860 | 2648 | ||
| 2861 | trans = btrfs_start_transaction(root, 0); | 2649 | /* 1 for the orphan item */ |
| 2650 | trans = btrfs_start_transaction(root, 1); | ||
| 2862 | if (IS_ERR(trans)) { | 2651 | if (IS_ERR(trans)) { |
| 2863 | btrfs_free_path(path); | 2652 | btrfs_free_path(path); |
| 2864 | root->fs_info->enospc_unlink = 0; | 2653 | root->fs_info->enospc_unlink = 0; |
| @@ -2963,6 +2752,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 2963 | err = 0; | 2752 | err = 0; |
| 2964 | out: | 2753 | out: |
| 2965 | btrfs_free_path(path); | 2754 | btrfs_free_path(path); |
| 2755 | /* Migrate the orphan reservation over */ | ||
| 2756 | if (!err) | ||
| 2757 | err = btrfs_block_rsv_migrate(trans->block_rsv, | ||
| 2758 | &root->fs_info->global_block_rsv, | ||
| 2759 | trans->bytes_reserved); | ||
| 2760 | |||
| 2966 | if (err) { | 2761 | if (err) { |
| 2967 | btrfs_end_transaction(trans, root); | 2762 | btrfs_end_transaction(trans, root); |
| 2968 | root->fs_info->enospc_unlink = 0; | 2763 | root->fs_info->enospc_unlink = 0; |
| @@ -2977,6 +2772,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, | |||
| 2977 | struct btrfs_root *root) | 2772 | struct btrfs_root *root) |
| 2978 | { | 2773 | { |
| 2979 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | 2774 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { |
| 2775 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
| 2776 | trans->bytes_reserved); | ||
| 2777 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 2980 | BUG_ON(!root->fs_info->enospc_unlink); | 2778 | BUG_ON(!root->fs_info->enospc_unlink); |
| 2981 | root->fs_info->enospc_unlink = 0; | 2779 | root->fs_info->enospc_unlink = 0; |
| 2982 | } | 2780 | } |
| @@ -3368,6 +3166,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3368 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | 3166 | pgoff_t index = from >> PAGE_CACHE_SHIFT; |
| 3369 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3167 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
| 3370 | struct page *page; | 3168 | struct page *page; |
| 3169 | gfp_t mask = btrfs_alloc_write_mask(mapping); | ||
| 3371 | int ret = 0; | 3170 | int ret = 0; |
| 3372 | u64 page_start; | 3171 | u64 page_start; |
| 3373 | u64 page_end; | 3172 | u64 page_end; |
| @@ -3380,7 +3179,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3380 | 3179 | ||
| 3381 | ret = -ENOMEM; | 3180 | ret = -ENOMEM; |
| 3382 | again: | 3181 | again: |
| 3383 | page = find_or_create_page(mapping, index, GFP_NOFS); | 3182 | page = find_or_create_page(mapping, index, mask); |
| 3384 | if (!page) { | 3183 | if (!page) { |
| 3385 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3184 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 3386 | goto out; | 3185 | goto out; |
| @@ -3613,6 +3412,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3613 | { | 3412 | { |
| 3614 | struct btrfs_trans_handle *trans; | 3413 | struct btrfs_trans_handle *trans; |
| 3615 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3414 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3415 | struct btrfs_block_rsv *rsv, *global_rsv; | ||
| 3416 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
| 3616 | unsigned long nr; | 3417 | unsigned long nr; |
| 3617 | int ret; | 3418 | int ret; |
| 3618 | 3419 | ||
| @@ -3640,22 +3441,55 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3640 | goto no_delete; | 3441 | goto no_delete; |
| 3641 | } | 3442 | } |
| 3642 | 3443 | ||
| 3444 | rsv = btrfs_alloc_block_rsv(root); | ||
| 3445 | if (!rsv) { | ||
| 3446 | btrfs_orphan_del(NULL, inode); | ||
| 3447 | goto no_delete; | ||
| 3448 | } | ||
| 3449 | rsv->size = min_size; | ||
| 3450 | global_rsv = &root->fs_info->global_block_rsv; | ||
| 3451 | |||
| 3643 | btrfs_i_size_write(inode, 0); | 3452 | btrfs_i_size_write(inode, 0); |
| 3644 | 3453 | ||
| 3454 | /* | ||
| 3455 | * This is a bit simpler than btrfs_truncate since | ||
| 3456 | * | ||
| 3457 | * 1) We've already reserved our space for our orphan item in the | ||
| 3458 | * unlink. | ||
| 3459 | * 2) We're going to delete the inode item, so we don't need to update | ||
| 3460 | * it at all. | ||
| 3461 | * | ||
| 3462 | * So we just need to reserve some slack space in case we add bytes when | ||
| 3463 | * doing the truncate. | ||
| 3464 | */ | ||
| 3645 | while (1) { | 3465 | while (1) { |
| 3646 | trans = btrfs_join_transaction(root); | 3466 | ret = btrfs_block_rsv_refill(root, rsv, min_size); |
| 3647 | BUG_ON(IS_ERR(trans)); | 3467 | |
| 3648 | trans->block_rsv = root->orphan_block_rsv; | 3468 | /* |
| 3469 | * Try and steal from the global reserve since we will | ||
| 3470 | * likely not use this space anyway, we want to try as | ||
| 3471 | * hard as possible to get this to work. | ||
| 3472 | */ | ||
| 3473 | if (ret) | ||
| 3474 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); | ||
| 3649 | 3475 | ||
| 3650 | ret = btrfs_block_rsv_check(trans, root, | ||
| 3651 | root->orphan_block_rsv, 0, 5); | ||
| 3652 | if (ret) { | 3476 | if (ret) { |
| 3653 | BUG_ON(ret != -EAGAIN); | 3477 | printk(KERN_WARNING "Could not get space for a " |
| 3654 | ret = btrfs_commit_transaction(trans, root); | 3478 | "delete, will truncate on mount %d\n", ret); |
| 3655 | BUG_ON(ret); | 3479 | btrfs_orphan_del(NULL, inode); |
| 3656 | continue; | 3480 | btrfs_free_block_rsv(root, rsv); |
| 3481 | goto no_delete; | ||
| 3657 | } | 3482 | } |
| 3658 | 3483 | ||
| 3484 | trans = btrfs_start_transaction(root, 0); | ||
| 3485 | if (IS_ERR(trans)) { | ||
| 3486 | btrfs_orphan_del(NULL, inode); | ||
| 3487 | btrfs_free_block_rsv(root, rsv); | ||
| 3488 | goto no_delete; | ||
| 3489 | } | ||
| 3490 | |||
| 3491 | trans->block_rsv = rsv; | ||
| 3492 | |||
| 3659 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3493 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); |
| 3660 | if (ret != -EAGAIN) | 3494 | if (ret != -EAGAIN) |
| 3661 | break; | 3495 | break; |
| @@ -3664,14 +3498,17 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3664 | btrfs_end_transaction(trans, root); | 3498 | btrfs_end_transaction(trans, root); |
| 3665 | trans = NULL; | 3499 | trans = NULL; |
| 3666 | btrfs_btree_balance_dirty(root, nr); | 3500 | btrfs_btree_balance_dirty(root, nr); |
| 3667 | |||
| 3668 | } | 3501 | } |
| 3669 | 3502 | ||
| 3503 | btrfs_free_block_rsv(root, rsv); | ||
| 3504 | |||
| 3670 | if (ret == 0) { | 3505 | if (ret == 0) { |
| 3506 | trans->block_rsv = root->orphan_block_rsv; | ||
| 3671 | ret = btrfs_orphan_del(trans, inode); | 3507 | ret = btrfs_orphan_del(trans, inode); |
| 3672 | BUG_ON(ret); | 3508 | BUG_ON(ret); |
| 3673 | } | 3509 | } |
| 3674 | 3510 | ||
| 3511 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 3675 | if (!(root == root->fs_info->tree_root || | 3512 | if (!(root == root->fs_info->tree_root || |
| 3676 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) | 3513 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) |
| 3677 | btrfs_return_ino(root, btrfs_ino(inode)); | 3514 | btrfs_return_ino(root, btrfs_ino(inode)); |
| @@ -5795,8 +5632,7 @@ again: | |||
| 5795 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | 5632 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { |
| 5796 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5633 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
| 5797 | if (!ret) | 5634 | if (!ret) |
| 5798 | ret = btrfs_update_inode(trans, root, inode); | 5635 | err = btrfs_update_inode(trans, root, inode); |
| 5799 | err = ret; | ||
| 5800 | goto out; | 5636 | goto out; |
| 5801 | } | 5637 | } |
| 5802 | 5638 | ||
| @@ -6289,7 +6125,7 @@ int btrfs_readpage(struct file *file, struct page *page) | |||
| 6289 | { | 6125 | { |
| 6290 | struct extent_io_tree *tree; | 6126 | struct extent_io_tree *tree; |
| 6291 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 6127 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 6292 | return extent_read_full_page(tree, page, btrfs_get_extent); | 6128 | return extent_read_full_page(tree, page, btrfs_get_extent, 0); |
| 6293 | } | 6129 | } |
| 6294 | 6130 | ||
| 6295 | static int btrfs_writepage(struct page *page, struct writeback_control *wbc) | 6131 | static int btrfs_writepage(struct page *page, struct writeback_control *wbc) |
| @@ -6541,6 +6377,7 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6541 | struct btrfs_trans_handle *trans; | 6377 | struct btrfs_trans_handle *trans; |
| 6542 | unsigned long nr; | 6378 | unsigned long nr; |
| 6543 | u64 mask = root->sectorsize - 1; | 6379 | u64 mask = root->sectorsize - 1; |
| 6380 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
| 6544 | 6381 | ||
| 6545 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6382 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
| 6546 | if (ret) | 6383 | if (ret) |
| @@ -6588,19 +6425,23 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6588 | rsv = btrfs_alloc_block_rsv(root); | 6425 | rsv = btrfs_alloc_block_rsv(root); |
| 6589 | if (!rsv) | 6426 | if (!rsv) |
| 6590 | return -ENOMEM; | 6427 | return -ENOMEM; |
| 6591 | btrfs_add_durable_block_rsv(root->fs_info, rsv); | 6428 | rsv->size = min_size; |
| 6592 | 6429 | ||
| 6430 | /* | ||
| 6431 | * 1 for the truncate slack space | ||
| 6432 | * 1 for the orphan item we're going to add | ||
| 6433 | * 1 for the orphan item deletion | ||
| 6434 | * 1 for updating the inode. | ||
| 6435 | */ | ||
| 6593 | trans = btrfs_start_transaction(root, 4); | 6436 | trans = btrfs_start_transaction(root, 4); |
| 6594 | if (IS_ERR(trans)) { | 6437 | if (IS_ERR(trans)) { |
| 6595 | err = PTR_ERR(trans); | 6438 | err = PTR_ERR(trans); |
| 6596 | goto out; | 6439 | goto out; |
| 6597 | } | 6440 | } |
| 6598 | 6441 | ||
| 6599 | /* | 6442 | /* Migrate the slack space for the truncate to our reserve */ |
| 6600 | * Reserve space for the truncate process. Truncate should be adding | 6443 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, |
| 6601 | * space, but if there are snapshots it may end up using space. | 6444 | min_size); |
| 6602 | */ | ||
| 6603 | ret = btrfs_truncate_reserve_metadata(trans, root, rsv); | ||
| 6604 | BUG_ON(ret); | 6445 | BUG_ON(ret); |
| 6605 | 6446 | ||
| 6606 | ret = btrfs_orphan_add(trans, inode); | 6447 | ret = btrfs_orphan_add(trans, inode); |
| @@ -6609,21 +6450,6 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6609 | goto out; | 6450 | goto out; |
| 6610 | } | 6451 | } |
| 6611 | 6452 | ||
| 6612 | nr = trans->blocks_used; | ||
| 6613 | btrfs_end_transaction(trans, root); | ||
| 6614 | btrfs_btree_balance_dirty(root, nr); | ||
| 6615 | |||
| 6616 | /* | ||
| 6617 | * Ok so we've already migrated our bytes over for the truncate, so here | ||
| 6618 | * just reserve the one slot we need for updating the inode. | ||
| 6619 | */ | ||
| 6620 | trans = btrfs_start_transaction(root, 1); | ||
| 6621 | if (IS_ERR(trans)) { | ||
| 6622 | err = PTR_ERR(trans); | ||
| 6623 | goto out; | ||
| 6624 | } | ||
| 6625 | trans->block_rsv = rsv; | ||
| 6626 | |||
| 6627 | /* | 6453 | /* |
| 6628 | * setattr is responsible for setting the ordered_data_close flag, | 6454 | * setattr is responsible for setting the ordered_data_close flag, |
| 6629 | * but that is only tested during the last file release. That | 6455 | * but that is only tested during the last file release. That |
| @@ -6645,20 +6471,30 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6645 | btrfs_add_ordered_operation(trans, root, inode); | 6471 | btrfs_add_ordered_operation(trans, root, inode); |
| 6646 | 6472 | ||
| 6647 | while (1) { | 6473 | while (1) { |
| 6474 | ret = btrfs_block_rsv_refill(root, rsv, min_size); | ||
| 6475 | if (ret) { | ||
| 6476 | /* | ||
| 6477 | * This can only happen with the original transaction we | ||
| 6478 | * started above, every other time we shouldn't have a | ||
| 6479 | * transaction started yet. | ||
| 6480 | */ | ||
| 6481 | if (ret == -EAGAIN) | ||
| 6482 | goto end_trans; | ||
| 6483 | err = ret; | ||
| 6484 | break; | ||
| 6485 | } | ||
| 6486 | |||
| 6648 | if (!trans) { | 6487 | if (!trans) { |
| 6649 | trans = btrfs_start_transaction(root, 3); | 6488 | /* Just need the 1 for updating the inode */ |
| 6489 | trans = btrfs_start_transaction(root, 1); | ||
| 6650 | if (IS_ERR(trans)) { | 6490 | if (IS_ERR(trans)) { |
| 6651 | err = PTR_ERR(trans); | 6491 | err = PTR_ERR(trans); |
| 6652 | goto out; | 6492 | goto out; |
| 6653 | } | 6493 | } |
| 6654 | |||
| 6655 | ret = btrfs_truncate_reserve_metadata(trans, root, | ||
| 6656 | rsv); | ||
| 6657 | BUG_ON(ret); | ||
| 6658 | |||
| 6659 | trans->block_rsv = rsv; | ||
| 6660 | } | 6494 | } |
| 6661 | 6495 | ||
| 6496 | trans->block_rsv = rsv; | ||
| 6497 | |||
| 6662 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6498 | ret = btrfs_truncate_inode_items(trans, root, inode, |
| 6663 | inode->i_size, | 6499 | inode->i_size, |
| 6664 | BTRFS_EXTENT_DATA_KEY); | 6500 | BTRFS_EXTENT_DATA_KEY); |
| @@ -6673,7 +6509,7 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6673 | err = ret; | 6509 | err = ret; |
| 6674 | break; | 6510 | break; |
| 6675 | } | 6511 | } |
| 6676 | 6512 | end_trans: | |
| 6677 | nr = trans->blocks_used; | 6513 | nr = trans->blocks_used; |
| 6678 | btrfs_end_transaction(trans, root); | 6514 | btrfs_end_transaction(trans, root); |
| 6679 | trans = NULL; | 6515 | trans = NULL; |
| @@ -6755,9 +6591,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6755 | ei->last_sub_trans = 0; | 6591 | ei->last_sub_trans = 0; |
| 6756 | ei->logged_trans = 0; | 6592 | ei->logged_trans = 0; |
| 6757 | ei->delalloc_bytes = 0; | 6593 | ei->delalloc_bytes = 0; |
| 6758 | ei->reserved_bytes = 0; | ||
| 6759 | ei->disk_i_size = 0; | 6594 | ei->disk_i_size = 0; |
| 6760 | ei->flags = 0; | 6595 | ei->flags = 0; |
| 6596 | ei->csum_bytes = 0; | ||
| 6761 | ei->index_cnt = (u64)-1; | 6597 | ei->index_cnt = (u64)-1; |
| 6762 | ei->last_unlink_trans = 0; | 6598 | ei->last_unlink_trans = 0; |
| 6763 | 6599 | ||
| @@ -6803,6 +6639,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 6803 | WARN_ON(inode->i_data.nrpages); | 6639 | WARN_ON(inode->i_data.nrpages); |
| 6804 | WARN_ON(BTRFS_I(inode)->outstanding_extents); | 6640 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
| 6805 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 6641 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
| 6642 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); | ||
| 6643 | WARN_ON(BTRFS_I(inode)->csum_bytes); | ||
| 6806 | 6644 | ||
| 6807 | /* | 6645 | /* |
| 6808 | * This can happen where we create an inode, but somebody else also | 6646 | * This can happen where we create an inode, but somebody else also |
| @@ -7420,7 +7258,6 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
| 7420 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, | 7258 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
| 7421 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, | 7259 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, |
| 7422 | .writepage_start_hook = btrfs_writepage_start_hook, | 7260 | .writepage_start_hook = btrfs_writepage_start_hook, |
| 7423 | .readpage_io_failed_hook = btrfs_io_failed_hook, | ||
| 7424 | .set_bit_hook = btrfs_set_bit_hook, | 7261 | .set_bit_hook = btrfs_set_bit_hook, |
| 7425 | .clear_bit_hook = btrfs_clear_bit_hook, | 7262 | .clear_bit_hook = btrfs_clear_bit_hook, |
| 7426 | .merge_extent_hook = btrfs_merge_extent_hook, | 7263 | .merge_extent_hook = btrfs_merge_extent_hook, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dae5dfe41ba5..4a34c472f126 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include "volumes.h" | 51 | #include "volumes.h" |
| 52 | #include "locking.h" | 52 | #include "locking.h" |
| 53 | #include "inode-map.h" | 53 | #include "inode-map.h" |
| 54 | #include "backref.h" | ||
| 54 | 55 | ||
| 55 | /* Mask out flags that are inappropriate for the given type of inode. */ | 56 | /* Mask out flags that are inappropriate for the given type of inode. */ |
| 56 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | 57 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) |
| @@ -117,7 +118,7 @@ void btrfs_update_iflags(struct inode *inode) | |||
| 117 | /* | 118 | /* |
| 118 | * Inherit flags from the parent inode. | 119 | * Inherit flags from the parent inode. |
| 119 | * | 120 | * |
| 120 | * Unlike extN we don't have any flags we don't want to inherit currently. | 121 | * Currently only the compression flags and the cow flags are inherited. |
| 121 | */ | 122 | */ |
| 122 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) | 123 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) |
| 123 | { | 124 | { |
| @@ -128,12 +129,17 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) | |||
| 128 | 129 | ||
| 129 | flags = BTRFS_I(dir)->flags; | 130 | flags = BTRFS_I(dir)->flags; |
| 130 | 131 | ||
| 131 | if (S_ISREG(inode->i_mode)) | 132 | if (flags & BTRFS_INODE_NOCOMPRESS) { |
| 132 | flags &= ~BTRFS_INODE_DIRSYNC; | 133 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; |
| 133 | else if (!S_ISDIR(inode->i_mode)) | 134 | BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; |
| 134 | flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); | 135 | } else if (flags & BTRFS_INODE_COMPRESS) { |
| 136 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; | ||
| 137 | BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; | ||
| 138 | } | ||
| 139 | |||
| 140 | if (flags & BTRFS_INODE_NODATACOW) | ||
| 141 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | ||
| 135 | 142 | ||
| 136 | BTRFS_I(inode)->flags = flags; | ||
| 137 | btrfs_update_iflags(inode); | 143 | btrfs_update_iflags(inode); |
| 138 | } | 144 | } |
| 139 | 145 | ||
| @@ -277,6 +283,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
| 277 | struct fstrim_range range; | 283 | struct fstrim_range range; |
| 278 | u64 minlen = ULLONG_MAX; | 284 | u64 minlen = ULLONG_MAX; |
| 279 | u64 num_devices = 0; | 285 | u64 num_devices = 0; |
| 286 | u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); | ||
| 280 | int ret; | 287 | int ret; |
| 281 | 288 | ||
| 282 | if (!capable(CAP_SYS_ADMIN)) | 289 | if (!capable(CAP_SYS_ADMIN)) |
| @@ -295,12 +302,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
| 295 | } | 302 | } |
| 296 | } | 303 | } |
| 297 | rcu_read_unlock(); | 304 | rcu_read_unlock(); |
| 305 | |||
| 298 | if (!num_devices) | 306 | if (!num_devices) |
| 299 | return -EOPNOTSUPP; | 307 | return -EOPNOTSUPP; |
| 300 | |||
| 301 | if (copy_from_user(&range, arg, sizeof(range))) | 308 | if (copy_from_user(&range, arg, sizeof(range))) |
| 302 | return -EFAULT; | 309 | return -EFAULT; |
| 310 | if (range.start > total_bytes) | ||
| 311 | return -EINVAL; | ||
| 303 | 312 | ||
| 313 | range.len = min(range.len, total_bytes - range.start); | ||
| 304 | range.minlen = max(range.minlen, minlen); | 314 | range.minlen = max(range.minlen, minlen); |
| 305 | ret = btrfs_trim_fs(root, &range); | 315 | ret = btrfs_trim_fs(root, &range); |
| 306 | if (ret < 0) | 316 | if (ret < 0) |
| @@ -760,7 +770,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
| 760 | int ret = 1; | 770 | int ret = 1; |
| 761 | 771 | ||
| 762 | /* | 772 | /* |
| 763 | * make sure that once we start defragging and extent, we keep on | 773 | * make sure that once we start defragging an extent, we keep on |
| 764 | * defragging it | 774 | * defragging it |
| 765 | */ | 775 | */ |
| 766 | if (start < *defrag_end) | 776 | if (start < *defrag_end) |
| @@ -805,7 +815,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
| 805 | * extent will force at least part of that big extent to be defragged. | 815 | * extent will force at least part of that big extent to be defragged. |
| 806 | */ | 816 | */ |
| 807 | if (ret) { | 817 | if (ret) { |
| 808 | *last_len += len; | ||
| 809 | *defrag_end = extent_map_end(em); | 818 | *defrag_end = extent_map_end(em); |
| 810 | } else { | 819 | } else { |
| 811 | *last_len = 0; | 820 | *last_len = 0; |
| @@ -843,6 +852,7 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
| 843 | int i_done; | 852 | int i_done; |
| 844 | struct btrfs_ordered_extent *ordered; | 853 | struct btrfs_ordered_extent *ordered; |
| 845 | struct extent_state *cached_state = NULL; | 854 | struct extent_state *cached_state = NULL; |
| 855 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
| 846 | 856 | ||
| 847 | if (isize == 0) | 857 | if (isize == 0) |
| 848 | return 0; | 858 | return 0; |
| @@ -860,7 +870,7 @@ again: | |||
| 860 | for (i = 0; i < num_pages; i++) { | 870 | for (i = 0; i < num_pages; i++) { |
| 861 | struct page *page; | 871 | struct page *page; |
| 862 | page = find_or_create_page(inode->i_mapping, | 872 | page = find_or_create_page(inode->i_mapping, |
| 863 | start_index + i, GFP_NOFS); | 873 | start_index + i, mask); |
| 864 | if (!page) | 874 | if (!page) |
| 865 | break; | 875 | break; |
| 866 | 876 | ||
| @@ -972,18 +982,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 972 | struct btrfs_super_block *disk_super; | 982 | struct btrfs_super_block *disk_super; |
| 973 | struct file_ra_state *ra = NULL; | 983 | struct file_ra_state *ra = NULL; |
| 974 | unsigned long last_index; | 984 | unsigned long last_index; |
| 985 | u64 isize = i_size_read(inode); | ||
| 975 | u64 features; | 986 | u64 features; |
| 976 | u64 last_len = 0; | 987 | u64 last_len = 0; |
| 977 | u64 skip = 0; | 988 | u64 skip = 0; |
| 978 | u64 defrag_end = 0; | 989 | u64 defrag_end = 0; |
| 979 | u64 newer_off = range->start; | 990 | u64 newer_off = range->start; |
| 980 | int newer_left = 0; | ||
| 981 | unsigned long i; | 991 | unsigned long i; |
| 992 | unsigned long ra_index = 0; | ||
| 982 | int ret; | 993 | int ret; |
| 983 | int defrag_count = 0; | 994 | int defrag_count = 0; |
| 984 | int compress_type = BTRFS_COMPRESS_ZLIB; | 995 | int compress_type = BTRFS_COMPRESS_ZLIB; |
| 985 | int extent_thresh = range->extent_thresh; | 996 | int extent_thresh = range->extent_thresh; |
| 986 | int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; | 997 | int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; |
| 998 | int cluster = max_cluster; | ||
| 987 | u64 new_align = ~((u64)128 * 1024 - 1); | 999 | u64 new_align = ~((u64)128 * 1024 - 1); |
| 988 | struct page **pages = NULL; | 1000 | struct page **pages = NULL; |
| 989 | 1001 | ||
| @@ -997,7 +1009,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 997 | compress_type = range->compress_type; | 1009 | compress_type = range->compress_type; |
| 998 | } | 1010 | } |
| 999 | 1011 | ||
| 1000 | if (inode->i_size == 0) | 1012 | if (isize == 0) |
| 1001 | return 0; | 1013 | return 0; |
| 1002 | 1014 | ||
| 1003 | /* | 1015 | /* |
| @@ -1013,7 +1025,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1013 | ra = &file->f_ra; | 1025 | ra = &file->f_ra; |
| 1014 | } | 1026 | } |
| 1015 | 1027 | ||
| 1016 | pages = kmalloc(sizeof(struct page *) * newer_cluster, | 1028 | pages = kmalloc(sizeof(struct page *) * max_cluster, |
| 1017 | GFP_NOFS); | 1029 | GFP_NOFS); |
| 1018 | if (!pages) { | 1030 | if (!pages) { |
| 1019 | ret = -ENOMEM; | 1031 | ret = -ENOMEM; |
| @@ -1022,10 +1034,10 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1022 | 1034 | ||
| 1023 | /* find the last page to defrag */ | 1035 | /* find the last page to defrag */ |
| 1024 | if (range->start + range->len > range->start) { | 1036 | if (range->start + range->len > range->start) { |
| 1025 | last_index = min_t(u64, inode->i_size - 1, | 1037 | last_index = min_t(u64, isize - 1, |
| 1026 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; | 1038 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; |
| 1027 | } else { | 1039 | } else { |
| 1028 | last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 1040 | last_index = (isize - 1) >> PAGE_CACHE_SHIFT; |
| 1029 | } | 1041 | } |
| 1030 | 1042 | ||
| 1031 | if (newer_than) { | 1043 | if (newer_than) { |
| @@ -1038,14 +1050,13 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1038 | * the extents in the file evenly spaced | 1050 | * the extents in the file evenly spaced |
| 1039 | */ | 1051 | */ |
| 1040 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | 1052 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; |
| 1041 | newer_left = newer_cluster; | ||
| 1042 | } else | 1053 | } else |
| 1043 | goto out_ra; | 1054 | goto out_ra; |
| 1044 | } else { | 1055 | } else { |
| 1045 | i = range->start >> PAGE_CACHE_SHIFT; | 1056 | i = range->start >> PAGE_CACHE_SHIFT; |
| 1046 | } | 1057 | } |
| 1047 | if (!max_to_defrag) | 1058 | if (!max_to_defrag) |
| 1048 | max_to_defrag = last_index - 1; | 1059 | max_to_defrag = last_index; |
| 1049 | 1060 | ||
| 1050 | /* | 1061 | /* |
| 1051 | * make writeback starts from i, so the defrag range can be | 1062 | * make writeback starts from i, so the defrag range can be |
| @@ -1079,18 +1090,31 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1079 | i = max(i + 1, next); | 1090 | i = max(i + 1, next); |
| 1080 | continue; | 1091 | continue; |
| 1081 | } | 1092 | } |
| 1093 | |||
| 1094 | if (!newer_than) { | ||
| 1095 | cluster = (PAGE_CACHE_ALIGN(defrag_end) >> | ||
| 1096 | PAGE_CACHE_SHIFT) - i; | ||
| 1097 | cluster = min(cluster, max_cluster); | ||
| 1098 | } else { | ||
| 1099 | cluster = max_cluster; | ||
| 1100 | } | ||
| 1101 | |||
| 1082 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 1102 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
| 1083 | BTRFS_I(inode)->force_compress = compress_type; | 1103 | BTRFS_I(inode)->force_compress = compress_type; |
| 1084 | 1104 | ||
| 1085 | btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster); | 1105 | if (i + cluster > ra_index) { |
| 1106 | ra_index = max(i, ra_index); | ||
| 1107 | btrfs_force_ra(inode->i_mapping, ra, file, ra_index, | ||
| 1108 | cluster); | ||
| 1109 | ra_index += max_cluster; | ||
| 1110 | } | ||
| 1086 | 1111 | ||
| 1087 | ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster); | 1112 | ret = cluster_pages_for_defrag(inode, pages, i, cluster); |
| 1088 | if (ret < 0) | 1113 | if (ret < 0) |
| 1089 | goto out_ra; | 1114 | goto out_ra; |
| 1090 | 1115 | ||
| 1091 | defrag_count += ret; | 1116 | defrag_count += ret; |
| 1092 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); | 1117 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); |
| 1093 | i += ret; | ||
| 1094 | 1118 | ||
| 1095 | if (newer_than) { | 1119 | if (newer_than) { |
| 1096 | if (newer_off == (u64)-1) | 1120 | if (newer_off == (u64)-1) |
| @@ -1105,12 +1129,17 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1105 | if (!ret) { | 1129 | if (!ret) { |
| 1106 | range->start = newer_off; | 1130 | range->start = newer_off; |
| 1107 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | 1131 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; |
| 1108 | newer_left = newer_cluster; | ||
| 1109 | } else { | 1132 | } else { |
| 1110 | break; | 1133 | break; |
| 1111 | } | 1134 | } |
| 1112 | } else { | 1135 | } else { |
| 1113 | i++; | 1136 | if (ret > 0) { |
| 1137 | i += ret; | ||
| 1138 | last_len += ret << PAGE_CACHE_SHIFT; | ||
| 1139 | } else { | ||
| 1140 | i++; | ||
| 1141 | last_len = 0; | ||
| 1142 | } | ||
| 1114 | } | 1143 | } |
| 1115 | } | 1144 | } |
| 1116 | 1145 | ||
| @@ -1136,16 +1165,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1136 | mutex_unlock(&inode->i_mutex); | 1165 | mutex_unlock(&inode->i_mutex); |
| 1137 | } | 1166 | } |
| 1138 | 1167 | ||
| 1139 | disk_super = &root->fs_info->super_copy; | 1168 | disk_super = root->fs_info->super_copy; |
| 1140 | features = btrfs_super_incompat_flags(disk_super); | 1169 | features = btrfs_super_incompat_flags(disk_super); |
| 1141 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | 1170 | if (range->compress_type == BTRFS_COMPRESS_LZO) { |
| 1142 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | 1171 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
| 1143 | btrfs_set_super_incompat_flags(disk_super, features); | 1172 | btrfs_set_super_incompat_flags(disk_super, features); |
| 1144 | } | 1173 | } |
| 1145 | 1174 | ||
| 1146 | if (!file) | 1175 | ret = defrag_count; |
| 1147 | kfree(ra); | ||
| 1148 | return defrag_count; | ||
| 1149 | 1176 | ||
| 1150 | out_ra: | 1177 | out_ra: |
| 1151 | if (!file) | 1178 | if (!file) |
| @@ -2587,7 +2614,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2587 | return PTR_ERR(trans); | 2614 | return PTR_ERR(trans); |
| 2588 | } | 2615 | } |
| 2589 | 2616 | ||
| 2590 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 2617 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); |
| 2591 | di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, | 2618 | di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, |
| 2592 | dir_id, "default", 7, 1); | 2619 | dir_id, "default", 7, 1); |
| 2593 | if (IS_ERR_OR_NULL(di)) { | 2620 | if (IS_ERR_OR_NULL(di)) { |
| @@ -2603,7 +2630,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2603 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2630 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 2604 | btrfs_free_path(path); | 2631 | btrfs_free_path(path); |
| 2605 | 2632 | ||
| 2606 | disk_super = &root->fs_info->super_copy; | 2633 | disk_super = root->fs_info->super_copy; |
| 2607 | features = btrfs_super_incompat_flags(disk_super); | 2634 | features = btrfs_super_incompat_flags(disk_super); |
| 2608 | if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { | 2635 | if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { |
| 2609 | features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; | 2636 | features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; |
| @@ -2864,6 +2891,144 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | |||
| 2864 | return ret; | 2891 | return ret; |
| 2865 | } | 2892 | } |
| 2866 | 2893 | ||
| 2894 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | ||
| 2895 | { | ||
| 2896 | int ret = 0; | ||
| 2897 | int i; | ||
| 2898 | u64 rel_ptr; | ||
| 2899 | int size; | ||
| 2900 | struct btrfs_ioctl_ino_path_args *ipa = NULL; | ||
| 2901 | struct inode_fs_paths *ipath = NULL; | ||
| 2902 | struct btrfs_path *path; | ||
| 2903 | |||
| 2904 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2905 | return -EPERM; | ||
| 2906 | |||
| 2907 | path = btrfs_alloc_path(); | ||
| 2908 | if (!path) { | ||
| 2909 | ret = -ENOMEM; | ||
| 2910 | goto out; | ||
| 2911 | } | ||
| 2912 | |||
| 2913 | ipa = memdup_user(arg, sizeof(*ipa)); | ||
| 2914 | if (IS_ERR(ipa)) { | ||
| 2915 | ret = PTR_ERR(ipa); | ||
| 2916 | ipa = NULL; | ||
| 2917 | goto out; | ||
| 2918 | } | ||
| 2919 | |||
| 2920 | size = min_t(u32, ipa->size, 4096); | ||
| 2921 | ipath = init_ipath(size, root, path); | ||
| 2922 | if (IS_ERR(ipath)) { | ||
| 2923 | ret = PTR_ERR(ipath); | ||
| 2924 | ipath = NULL; | ||
| 2925 | goto out; | ||
| 2926 | } | ||
| 2927 | |||
| 2928 | ret = paths_from_inode(ipa->inum, ipath); | ||
| 2929 | if (ret < 0) | ||
| 2930 | goto out; | ||
| 2931 | |||
| 2932 | for (i = 0; i < ipath->fspath->elem_cnt; ++i) { | ||
| 2933 | rel_ptr = ipath->fspath->val[i] - (u64)ipath->fspath->val; | ||
| 2934 | ipath->fspath->val[i] = rel_ptr; | ||
| 2935 | } | ||
| 2936 | |||
| 2937 | ret = copy_to_user((void *)ipa->fspath, (void *)ipath->fspath, size); | ||
| 2938 | if (ret) { | ||
| 2939 | ret = -EFAULT; | ||
| 2940 | goto out; | ||
| 2941 | } | ||
| 2942 | |||
| 2943 | out: | ||
| 2944 | btrfs_free_path(path); | ||
| 2945 | free_ipath(ipath); | ||
| 2946 | kfree(ipa); | ||
| 2947 | |||
| 2948 | return ret; | ||
| 2949 | } | ||
| 2950 | |||
| 2951 | static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) | ||
| 2952 | { | ||
| 2953 | struct btrfs_data_container *inodes = ctx; | ||
| 2954 | const size_t c = 3 * sizeof(u64); | ||
| 2955 | |||
| 2956 | if (inodes->bytes_left >= c) { | ||
| 2957 | inodes->bytes_left -= c; | ||
| 2958 | inodes->val[inodes->elem_cnt] = inum; | ||
| 2959 | inodes->val[inodes->elem_cnt + 1] = offset; | ||
| 2960 | inodes->val[inodes->elem_cnt + 2] = root; | ||
| 2961 | inodes->elem_cnt += 3; | ||
| 2962 | } else { | ||
| 2963 | inodes->bytes_missing += c - inodes->bytes_left; | ||
| 2964 | inodes->bytes_left = 0; | ||
| 2965 | inodes->elem_missed += 3; | ||
| 2966 | } | ||
| 2967 | |||
| 2968 | return 0; | ||
| 2969 | } | ||
| 2970 | |||
| 2971 | static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | ||
| 2972 | void __user *arg) | ||
| 2973 | { | ||
| 2974 | int ret = 0; | ||
| 2975 | int size; | ||
| 2976 | u64 extent_offset; | ||
| 2977 | struct btrfs_ioctl_logical_ino_args *loi; | ||
| 2978 | struct btrfs_data_container *inodes = NULL; | ||
| 2979 | struct btrfs_path *path = NULL; | ||
| 2980 | struct btrfs_key key; | ||
| 2981 | |||
| 2982 | if (!capable(CAP_SYS_ADMIN)) | ||
| 2983 | return -EPERM; | ||
| 2984 | |||
| 2985 | loi = memdup_user(arg, sizeof(*loi)); | ||
| 2986 | if (IS_ERR(loi)) { | ||
| 2987 | ret = PTR_ERR(loi); | ||
| 2988 | loi = NULL; | ||
| 2989 | goto out; | ||
| 2990 | } | ||
| 2991 | |||
| 2992 | path = btrfs_alloc_path(); | ||
| 2993 | if (!path) { | ||
| 2994 | ret = -ENOMEM; | ||
| 2995 | goto out; | ||
| 2996 | } | ||
| 2997 | |||
| 2998 | size = min_t(u32, loi->size, 4096); | ||
| 2999 | inodes = init_data_container(size); | ||
| 3000 | if (IS_ERR(inodes)) { | ||
| 3001 | ret = PTR_ERR(inodes); | ||
| 3002 | inodes = NULL; | ||
| 3003 | goto out; | ||
| 3004 | } | ||
| 3005 | |||
| 3006 | ret = extent_from_logical(root->fs_info, loi->logical, path, &key); | ||
| 3007 | |||
| 3008 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
| 3009 | ret = -ENOENT; | ||
| 3010 | if (ret < 0) | ||
| 3011 | goto out; | ||
| 3012 | |||
| 3013 | extent_offset = loi->logical - key.objectid; | ||
| 3014 | ret = iterate_extent_inodes(root->fs_info, path, key.objectid, | ||
| 3015 | extent_offset, build_ino_list, inodes); | ||
| 3016 | |||
| 3017 | if (ret < 0) | ||
| 3018 | goto out; | ||
| 3019 | |||
| 3020 | ret = copy_to_user((void *)loi->inodes, (void *)inodes, size); | ||
| 3021 | if (ret) | ||
| 3022 | ret = -EFAULT; | ||
| 3023 | |||
| 3024 | out: | ||
| 3025 | btrfs_free_path(path); | ||
| 3026 | kfree(inodes); | ||
| 3027 | kfree(loi); | ||
| 3028 | |||
| 3029 | return ret; | ||
| 3030 | } | ||
| 3031 | |||
| 2867 | long btrfs_ioctl(struct file *file, unsigned int | 3032 | long btrfs_ioctl(struct file *file, unsigned int |
| 2868 | cmd, unsigned long arg) | 3033 | cmd, unsigned long arg) |
| 2869 | { | 3034 | { |
| @@ -2921,6 +3086,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 2921 | return btrfs_ioctl_tree_search(file, argp); | 3086 | return btrfs_ioctl_tree_search(file, argp); |
| 2922 | case BTRFS_IOC_INO_LOOKUP: | 3087 | case BTRFS_IOC_INO_LOOKUP: |
| 2923 | return btrfs_ioctl_ino_lookup(file, argp); | 3088 | return btrfs_ioctl_ino_lookup(file, argp); |
| 3089 | case BTRFS_IOC_INO_PATHS: | ||
| 3090 | return btrfs_ioctl_ino_to_path(root, argp); | ||
| 3091 | case BTRFS_IOC_LOGICAL_INO: | ||
| 3092 | return btrfs_ioctl_logical_to_ino(root, argp); | ||
| 2924 | case BTRFS_IOC_SPACE_INFO: | 3093 | case BTRFS_IOC_SPACE_INFO: |
| 2925 | return btrfs_ioctl_space_info(root, argp); | 3094 | return btrfs_ioctl_space_info(root, argp); |
| 2926 | case BTRFS_IOC_SYNC: | 3095 | case BTRFS_IOC_SYNC: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index ad1ea789fcb4..252ae9915de8 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -193,6 +193,30 @@ struct btrfs_ioctl_space_args { | |||
| 193 | struct btrfs_ioctl_space_info spaces[0]; | 193 | struct btrfs_ioctl_space_info spaces[0]; |
| 194 | }; | 194 | }; |
| 195 | 195 | ||
| 196 | struct btrfs_data_container { | ||
| 197 | __u32 bytes_left; /* out -- bytes not needed to deliver output */ | ||
| 198 | __u32 bytes_missing; /* out -- additional bytes needed for result */ | ||
| 199 | __u32 elem_cnt; /* out */ | ||
| 200 | __u32 elem_missed; /* out */ | ||
| 201 | __u64 val[0]; /* out */ | ||
| 202 | }; | ||
| 203 | |||
| 204 | struct btrfs_ioctl_ino_path_args { | ||
| 205 | __u64 inum; /* in */ | ||
| 206 | __u32 size; /* in */ | ||
| 207 | __u64 reserved[4]; | ||
| 208 | /* struct btrfs_data_container *fspath; out */ | ||
| 209 | __u64 fspath; /* out */ | ||
| 210 | }; | ||
| 211 | |||
| 212 | struct btrfs_ioctl_logical_ino_args { | ||
| 213 | __u64 logical; /* in */ | ||
| 214 | __u32 size; /* in */ | ||
| 215 | __u64 reserved[4]; | ||
| 216 | /* struct btrfs_data_container *inodes; out */ | ||
| 217 | __u64 inodes; | ||
| 218 | }; | ||
| 219 | |||
| 196 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | 220 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ |
| 197 | struct btrfs_ioctl_vol_args) | 221 | struct btrfs_ioctl_vol_args) |
| 198 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | 222 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ |
| @@ -248,4 +272,9 @@ struct btrfs_ioctl_space_args { | |||
| 248 | struct btrfs_ioctl_dev_info_args) | 272 | struct btrfs_ioctl_dev_info_args) |
| 249 | #define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ | 273 | #define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ |
| 250 | struct btrfs_ioctl_fs_info_args) | 274 | struct btrfs_ioctl_fs_info_args) |
| 275 | #define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ | ||
| 276 | struct btrfs_ioctl_ino_path_args) | ||
| 277 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ | ||
| 278 | struct btrfs_ioctl_ino_path_args) | ||
| 279 | |||
| 251 | #endif | 280 | #endif |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fb2605d998e9..f38e452486b8 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
| @@ -158,8 +158,7 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) | |||
| 158 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | 158 | void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) |
| 159 | { | 159 | { |
| 160 | int i; | 160 | int i; |
| 161 | u32 type; | 161 | u32 type, nr; |
| 162 | u32 nr = btrfs_header_nritems(l); | ||
| 163 | struct btrfs_item *item; | 162 | struct btrfs_item *item; |
| 164 | struct btrfs_root_item *ri; | 163 | struct btrfs_root_item *ri; |
| 165 | struct btrfs_dir_item *di; | 164 | struct btrfs_dir_item *di; |
| @@ -172,6 +171,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
| 172 | struct btrfs_key key; | 171 | struct btrfs_key key; |
| 173 | struct btrfs_key found_key; | 172 | struct btrfs_key found_key; |
| 174 | 173 | ||
| 174 | if (!l) | ||
| 175 | return; | ||
| 176 | |||
| 177 | nr = btrfs_header_nritems(l); | ||
| 178 | |||
| 175 | printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", | 179 | printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", |
| 176 | (unsigned long long)btrfs_header_bytenr(l), nr, | 180 | (unsigned long long)btrfs_header_bytenr(l), nr, |
| 177 | btrfs_leaf_free_space(root, l)); | 181 | btrfs_leaf_free_space(root, l)); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c new file mode 100644 index 000000000000..2373b39a132b --- /dev/null +++ b/fs/btrfs/reada.c | |||
| @@ -0,0 +1,951 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/sched.h> | ||
| 20 | #include <linux/pagemap.h> | ||
| 21 | #include <linux/writeback.h> | ||
| 22 | #include <linux/blkdev.h> | ||
| 23 | #include <linux/rbtree.h> | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/workqueue.h> | ||
| 26 | #include "ctree.h" | ||
| 27 | #include "volumes.h" | ||
| 28 | #include "disk-io.h" | ||
| 29 | #include "transaction.h" | ||
| 30 | |||
| 31 | #undef DEBUG | ||
| 32 | |||
| 33 | /* | ||
| 34 | * This is the implementation for the generic read ahead framework. | ||
| 35 | * | ||
| 36 | * To trigger a readahead, btrfs_reada_add must be called. It will start | ||
| 37 | * a read ahead for the given range [start, end) on tree root. The returned | ||
| 38 | * handle can either be used to wait on the readahead to finish | ||
| 39 | * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach). | ||
| 40 | * | ||
| 41 | * The read ahead works as follows: | ||
| 42 | * On btrfs_reada_add, the root of the tree is inserted into a radix_tree. | ||
| 43 | * reada_start_machine will then search for extents to prefetch and trigger | ||
| 44 | * some reads. When a read finishes for a node, all contained node/leaf | ||
| 45 | * pointers that lie in the given range will also be enqueued. The reads will | ||
| 46 | * be triggered in sequential order, thus giving a big win over a naive | ||
| 47 | * enumeration. It will also make use of multi-device layouts. Each disk | ||
| 48 | * will have its on read pointer and all disks will by utilized in parallel. | ||
| 49 | * Also will no two disks read both sides of a mirror simultaneously, as this | ||
| 50 | * would waste seeking capacity. Instead both disks will read different parts | ||
| 51 | * of the filesystem. | ||
| 52 | * Any number of readaheads can be started in parallel. The read order will be | ||
| 53 | * determined globally, i.e. 2 parallel readaheads will normally finish faster | ||
| 54 | * than the 2 started one after another. | ||
| 55 | */ | ||
| 56 | |||
| 57 | #define MAX_MIRRORS 2 | ||
| 58 | #define MAX_IN_FLIGHT 6 | ||
| 59 | |||
| 60 | struct reada_extctl { | ||
| 61 | struct list_head list; | ||
| 62 | struct reada_control *rc; | ||
| 63 | u64 generation; | ||
| 64 | }; | ||
| 65 | |||
| 66 | struct reada_extent { | ||
| 67 | u64 logical; | ||
| 68 | struct btrfs_key top; | ||
| 69 | u32 blocksize; | ||
| 70 | int err; | ||
| 71 | struct list_head extctl; | ||
| 72 | struct kref refcnt; | ||
| 73 | spinlock_t lock; | ||
| 74 | struct reada_zone *zones[MAX_MIRRORS]; | ||
| 75 | int nzones; | ||
| 76 | struct btrfs_device *scheduled_for; | ||
| 77 | }; | ||
| 78 | |||
| 79 | struct reada_zone { | ||
| 80 | u64 start; | ||
| 81 | u64 end; | ||
| 82 | u64 elems; | ||
| 83 | struct list_head list; | ||
| 84 | spinlock_t lock; | ||
| 85 | int locked; | ||
| 86 | struct btrfs_device *device; | ||
| 87 | struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */ | ||
| 88 | int ndevs; | ||
| 89 | struct kref refcnt; | ||
| 90 | }; | ||
| 91 | |||
| 92 | struct reada_machine_work { | ||
| 93 | struct btrfs_work work; | ||
| 94 | struct btrfs_fs_info *fs_info; | ||
| 95 | }; | ||
| 96 | |||
| 97 | static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *); | ||
| 98 | static void reada_control_release(struct kref *kref); | ||
| 99 | static void reada_zone_release(struct kref *kref); | ||
| 100 | static void reada_start_machine(struct btrfs_fs_info *fs_info); | ||
| 101 | static void __reada_start_machine(struct btrfs_fs_info *fs_info); | ||
| 102 | |||
| 103 | static int reada_add_block(struct reada_control *rc, u64 logical, | ||
| 104 | struct btrfs_key *top, int level, u64 generation); | ||
| 105 | |||
| 106 | /* recurses */ | ||
| 107 | /* in case of err, eb might be NULL */ | ||
| 108 | static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | ||
| 109 | u64 start, int err) | ||
| 110 | { | ||
| 111 | int level = 0; | ||
| 112 | int nritems; | ||
| 113 | int i; | ||
| 114 | u64 bytenr; | ||
| 115 | u64 generation; | ||
| 116 | struct reada_extent *re; | ||
| 117 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 118 | struct list_head list; | ||
| 119 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
| 120 | struct btrfs_device *for_dev; | ||
| 121 | |||
| 122 | if (eb) | ||
| 123 | level = btrfs_header_level(eb); | ||
| 124 | |||
| 125 | /* find extent */ | ||
| 126 | spin_lock(&fs_info->reada_lock); | ||
| 127 | re = radix_tree_lookup(&fs_info->reada_tree, index); | ||
| 128 | if (re) | ||
| 129 | kref_get(&re->refcnt); | ||
| 130 | spin_unlock(&fs_info->reada_lock); | ||
| 131 | |||
| 132 | if (!re) | ||
| 133 | return -1; | ||
| 134 | |||
| 135 | spin_lock(&re->lock); | ||
| 136 | /* | ||
| 137 | * just take the full list from the extent. afterwards we | ||
| 138 | * don't need the lock anymore | ||
| 139 | */ | ||
| 140 | list_replace_init(&re->extctl, &list); | ||
| 141 | for_dev = re->scheduled_for; | ||
| 142 | re->scheduled_for = NULL; | ||
| 143 | spin_unlock(&re->lock); | ||
| 144 | |||
| 145 | if (err == 0) { | ||
| 146 | nritems = level ? btrfs_header_nritems(eb) : 0; | ||
| 147 | generation = btrfs_header_generation(eb); | ||
| 148 | /* | ||
| 149 | * FIXME: currently we just set nritems to 0 if this is a leaf, | ||
| 150 | * effectively ignoring the content. In a next step we could | ||
| 151 | * trigger more readahead depending from the content, e.g. | ||
| 152 | * fetch the checksums for the extents in the leaf. | ||
| 153 | */ | ||
| 154 | } else { | ||
| 155 | /* | ||
| 156 | * this is the error case, the extent buffer has not been | ||
| 157 | * read correctly. We won't access anything from it and | ||
| 158 | * just cleanup our data structures. Effectively this will | ||
| 159 | * cut the branch below this node from read ahead. | ||
| 160 | */ | ||
| 161 | nritems = 0; | ||
| 162 | generation = 0; | ||
| 163 | } | ||
| 164 | |||
| 165 | for (i = 0; i < nritems; i++) { | ||
| 166 | struct reada_extctl *rec; | ||
| 167 | u64 n_gen; | ||
| 168 | struct btrfs_key key; | ||
| 169 | struct btrfs_key next_key; | ||
| 170 | |||
| 171 | btrfs_node_key_to_cpu(eb, &key, i); | ||
| 172 | if (i + 1 < nritems) | ||
| 173 | btrfs_node_key_to_cpu(eb, &next_key, i + 1); | ||
| 174 | else | ||
| 175 | next_key = re->top; | ||
| 176 | bytenr = btrfs_node_blockptr(eb, i); | ||
| 177 | n_gen = btrfs_node_ptr_generation(eb, i); | ||
| 178 | |||
| 179 | list_for_each_entry(rec, &list, list) { | ||
| 180 | struct reada_control *rc = rec->rc; | ||
| 181 | |||
| 182 | /* | ||
| 183 | * if the generation doesn't match, just ignore this | ||
| 184 | * extctl. This will probably cut off a branch from | ||
| 185 | * prefetch. Alternatively one could start a new (sub-) | ||
| 186 | * prefetch for this branch, starting again from root. | ||
| 187 | * FIXME: move the generation check out of this loop | ||
| 188 | */ | ||
| 189 | #ifdef DEBUG | ||
| 190 | if (rec->generation != generation) { | ||
| 191 | printk(KERN_DEBUG "generation mismatch for " | ||
| 192 | "(%llu,%d,%llu) %llu != %llu\n", | ||
| 193 | key.objectid, key.type, key.offset, | ||
| 194 | rec->generation, generation); | ||
| 195 | } | ||
| 196 | #endif | ||
| 197 | if (rec->generation == generation && | ||
| 198 | btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && | ||
| 199 | btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) | ||
| 200 | reada_add_block(rc, bytenr, &next_key, | ||
| 201 | level - 1, n_gen); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | /* | ||
| 205 | * free extctl records | ||
| 206 | */ | ||
| 207 | while (!list_empty(&list)) { | ||
| 208 | struct reada_control *rc; | ||
| 209 | struct reada_extctl *rec; | ||
| 210 | |||
| 211 | rec = list_first_entry(&list, struct reada_extctl, list); | ||
| 212 | list_del(&rec->list); | ||
| 213 | rc = rec->rc; | ||
| 214 | kfree(rec); | ||
| 215 | |||
| 216 | kref_get(&rc->refcnt); | ||
| 217 | if (atomic_dec_and_test(&rc->elems)) { | ||
| 218 | kref_put(&rc->refcnt, reada_control_release); | ||
| 219 | wake_up(&rc->wait); | ||
| 220 | } | ||
| 221 | kref_put(&rc->refcnt, reada_control_release); | ||
| 222 | |||
| 223 | reada_extent_put(fs_info, re); /* one ref for each entry */ | ||
| 224 | } | ||
| 225 | reada_extent_put(fs_info, re); /* our ref */ | ||
| 226 | if (for_dev) | ||
| 227 | atomic_dec(&for_dev->reada_in_flight); | ||
| 228 | |||
| 229 | return 0; | ||
| 230 | } | ||
| 231 | |||
| 232 | /* | ||
| 233 | * start is passed separately in case eb in NULL, which may be the case with | ||
| 234 | * failed I/O | ||
| 235 | */ | ||
| 236 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | ||
| 237 | u64 start, int err) | ||
| 238 | { | ||
| 239 | int ret; | ||
| 240 | |||
| 241 | ret = __readahead_hook(root, eb, start, err); | ||
| 242 | |||
| 243 | reada_start_machine(root->fs_info); | ||
| 244 | |||
| 245 | return ret; | ||
| 246 | } | ||
| 247 | |||
| 248 | static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | ||
| 249 | struct btrfs_device *dev, u64 logical, | ||
| 250 | struct btrfs_bio *bbio) | ||
| 251 | { | ||
| 252 | int ret; | ||
| 253 | int looped = 0; | ||
| 254 | struct reada_zone *zone; | ||
| 255 | struct btrfs_block_group_cache *cache = NULL; | ||
| 256 | u64 start; | ||
| 257 | u64 end; | ||
| 258 | int i; | ||
| 259 | |||
| 260 | again: | ||
| 261 | zone = NULL; | ||
| 262 | spin_lock(&fs_info->reada_lock); | ||
| 263 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, | ||
| 264 | logical >> PAGE_CACHE_SHIFT, 1); | ||
| 265 | if (ret == 1) | ||
| 266 | kref_get(&zone->refcnt); | ||
| 267 | spin_unlock(&fs_info->reada_lock); | ||
| 268 | |||
| 269 | if (ret == 1) { | ||
| 270 | if (logical >= zone->start && logical < zone->end) | ||
| 271 | return zone; | ||
| 272 | spin_lock(&fs_info->reada_lock); | ||
| 273 | kref_put(&zone->refcnt, reada_zone_release); | ||
| 274 | spin_unlock(&fs_info->reada_lock); | ||
| 275 | } | ||
| 276 | |||
| 277 | if (looped) | ||
| 278 | return NULL; | ||
| 279 | |||
| 280 | cache = btrfs_lookup_block_group(fs_info, logical); | ||
| 281 | if (!cache) | ||
| 282 | return NULL; | ||
| 283 | |||
| 284 | start = cache->key.objectid; | ||
| 285 | end = start + cache->key.offset - 1; | ||
| 286 | btrfs_put_block_group(cache); | ||
| 287 | |||
| 288 | zone = kzalloc(sizeof(*zone), GFP_NOFS); | ||
| 289 | if (!zone) | ||
| 290 | return NULL; | ||
| 291 | |||
| 292 | zone->start = start; | ||
| 293 | zone->end = end; | ||
| 294 | INIT_LIST_HEAD(&zone->list); | ||
| 295 | spin_lock_init(&zone->lock); | ||
| 296 | zone->locked = 0; | ||
| 297 | kref_init(&zone->refcnt); | ||
| 298 | zone->elems = 0; | ||
| 299 | zone->device = dev; /* our device always sits at index 0 */ | ||
| 300 | for (i = 0; i < bbio->num_stripes; ++i) { | ||
| 301 | /* bounds have already been checked */ | ||
| 302 | zone->devs[i] = bbio->stripes[i].dev; | ||
| 303 | } | ||
| 304 | zone->ndevs = bbio->num_stripes; | ||
| 305 | |||
| 306 | spin_lock(&fs_info->reada_lock); | ||
| 307 | ret = radix_tree_insert(&dev->reada_zones, | ||
| 308 | (unsigned long)zone->end >> PAGE_CACHE_SHIFT, | ||
| 309 | zone); | ||
| 310 | spin_unlock(&fs_info->reada_lock); | ||
| 311 | |||
| 312 | if (ret) { | ||
| 313 | kfree(zone); | ||
| 314 | looped = 1; | ||
| 315 | goto again; | ||
| 316 | } | ||
| 317 | |||
| 318 | return zone; | ||
| 319 | } | ||
| 320 | |||
| 321 | static struct reada_extent *reada_find_extent(struct btrfs_root *root, | ||
| 322 | u64 logical, | ||
| 323 | struct btrfs_key *top, int level) | ||
| 324 | { | ||
| 325 | int ret; | ||
| 326 | int looped = 0; | ||
| 327 | struct reada_extent *re = NULL; | ||
| 328 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 329 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
| 330 | struct btrfs_bio *bbio = NULL; | ||
| 331 | struct btrfs_device *dev; | ||
| 332 | u32 blocksize; | ||
| 333 | u64 length; | ||
| 334 | int nzones = 0; | ||
| 335 | int i; | ||
| 336 | unsigned long index = logical >> PAGE_CACHE_SHIFT; | ||
| 337 | |||
| 338 | again: | ||
| 339 | spin_lock(&fs_info->reada_lock); | ||
| 340 | re = radix_tree_lookup(&fs_info->reada_tree, index); | ||
| 341 | if (re) | ||
| 342 | kref_get(&re->refcnt); | ||
| 343 | spin_unlock(&fs_info->reada_lock); | ||
| 344 | |||
| 345 | if (re || looped) | ||
| 346 | return re; | ||
| 347 | |||
| 348 | re = kzalloc(sizeof(*re), GFP_NOFS); | ||
| 349 | if (!re) | ||
| 350 | return NULL; | ||
| 351 | |||
| 352 | blocksize = btrfs_level_size(root, level); | ||
| 353 | re->logical = logical; | ||
| 354 | re->blocksize = blocksize; | ||
| 355 | re->top = *top; | ||
| 356 | INIT_LIST_HEAD(&re->extctl); | ||
| 357 | spin_lock_init(&re->lock); | ||
| 358 | kref_init(&re->refcnt); | ||
| 359 | |||
| 360 | /* | ||
| 361 | * map block | ||
| 362 | */ | ||
| 363 | length = blocksize; | ||
| 364 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0); | ||
| 365 | if (ret || !bbio || length < blocksize) | ||
| 366 | goto error; | ||
| 367 | |||
| 368 | if (bbio->num_stripes > MAX_MIRRORS) { | ||
| 369 | printk(KERN_ERR "btrfs readahead: more than %d copies not " | ||
| 370 | "supported", MAX_MIRRORS); | ||
| 371 | goto error; | ||
| 372 | } | ||
| 373 | |||
| 374 | for (nzones = 0; nzones < bbio->num_stripes; ++nzones) { | ||
| 375 | struct reada_zone *zone; | ||
| 376 | |||
| 377 | dev = bbio->stripes[nzones].dev; | ||
| 378 | zone = reada_find_zone(fs_info, dev, logical, bbio); | ||
| 379 | if (!zone) | ||
| 380 | break; | ||
| 381 | |||
| 382 | re->zones[nzones] = zone; | ||
| 383 | spin_lock(&zone->lock); | ||
| 384 | if (!zone->elems) | ||
| 385 | kref_get(&zone->refcnt); | ||
| 386 | ++zone->elems; | ||
| 387 | spin_unlock(&zone->lock); | ||
| 388 | spin_lock(&fs_info->reada_lock); | ||
| 389 | kref_put(&zone->refcnt, reada_zone_release); | ||
| 390 | spin_unlock(&fs_info->reada_lock); | ||
| 391 | } | ||
| 392 | re->nzones = nzones; | ||
| 393 | if (nzones == 0) { | ||
| 394 | /* not a single zone found, error and out */ | ||
| 395 | goto error; | ||
| 396 | } | ||
| 397 | |||
| 398 | /* insert extent in reada_tree + all per-device trees, all or nothing */ | ||
| 399 | spin_lock(&fs_info->reada_lock); | ||
| 400 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); | ||
| 401 | if (ret) { | ||
| 402 | spin_unlock(&fs_info->reada_lock); | ||
| 403 | if (ret != -ENOMEM) { | ||
| 404 | /* someone inserted the extent in the meantime */ | ||
| 405 | looped = 1; | ||
| 406 | } | ||
| 407 | goto error; | ||
| 408 | } | ||
| 409 | for (i = 0; i < nzones; ++i) { | ||
| 410 | dev = bbio->stripes[i].dev; | ||
| 411 | ret = radix_tree_insert(&dev->reada_extents, index, re); | ||
| 412 | if (ret) { | ||
| 413 | while (--i >= 0) { | ||
| 414 | dev = bbio->stripes[i].dev; | ||
| 415 | BUG_ON(dev == NULL); | ||
| 416 | radix_tree_delete(&dev->reada_extents, index); | ||
| 417 | } | ||
| 418 | BUG_ON(fs_info == NULL); | ||
| 419 | radix_tree_delete(&fs_info->reada_tree, index); | ||
| 420 | spin_unlock(&fs_info->reada_lock); | ||
| 421 | goto error; | ||
| 422 | } | ||
| 423 | } | ||
| 424 | spin_unlock(&fs_info->reada_lock); | ||
| 425 | |||
| 426 | kfree(bbio); | ||
| 427 | return re; | ||
| 428 | |||
| 429 | error: | ||
| 430 | while (nzones) { | ||
| 431 | struct reada_zone *zone; | ||
| 432 | |||
| 433 | --nzones; | ||
| 434 | zone = re->zones[nzones]; | ||
| 435 | kref_get(&zone->refcnt); | ||
| 436 | spin_lock(&zone->lock); | ||
| 437 | --zone->elems; | ||
| 438 | if (zone->elems == 0) { | ||
| 439 | /* | ||
| 440 | * no fs_info->reada_lock needed, as this can't be | ||
| 441 | * the last ref | ||
| 442 | */ | ||
| 443 | kref_put(&zone->refcnt, reada_zone_release); | ||
| 444 | } | ||
| 445 | spin_unlock(&zone->lock); | ||
| 446 | |||
| 447 | spin_lock(&fs_info->reada_lock); | ||
| 448 | kref_put(&zone->refcnt, reada_zone_release); | ||
| 449 | spin_unlock(&fs_info->reada_lock); | ||
| 450 | } | ||
| 451 | kfree(bbio); | ||
| 452 | kfree(re); | ||
| 453 | if (looped) | ||
| 454 | goto again; | ||
| 455 | return NULL; | ||
| 456 | } | ||
| 457 | |||
| 458 | static void reada_kref_dummy(struct kref *kr) | ||
| 459 | { | ||
| 460 | } | ||
| 461 | |||
| 462 | static void reada_extent_put(struct btrfs_fs_info *fs_info, | ||
| 463 | struct reada_extent *re) | ||
| 464 | { | ||
| 465 | int i; | ||
| 466 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; | ||
| 467 | |||
| 468 | spin_lock(&fs_info->reada_lock); | ||
| 469 | if (!kref_put(&re->refcnt, reada_kref_dummy)) { | ||
| 470 | spin_unlock(&fs_info->reada_lock); | ||
| 471 | return; | ||
| 472 | } | ||
| 473 | |||
| 474 | radix_tree_delete(&fs_info->reada_tree, index); | ||
| 475 | for (i = 0; i < re->nzones; ++i) { | ||
| 476 | struct reada_zone *zone = re->zones[i]; | ||
| 477 | |||
| 478 | radix_tree_delete(&zone->device->reada_extents, index); | ||
| 479 | } | ||
| 480 | |||
| 481 | spin_unlock(&fs_info->reada_lock); | ||
| 482 | |||
| 483 | for (i = 0; i < re->nzones; ++i) { | ||
| 484 | struct reada_zone *zone = re->zones[i]; | ||
| 485 | |||
| 486 | kref_get(&zone->refcnt); | ||
| 487 | spin_lock(&zone->lock); | ||
| 488 | --zone->elems; | ||
| 489 | if (zone->elems == 0) { | ||
| 490 | /* no fs_info->reada_lock needed, as this can't be | ||
| 491 | * the last ref */ | ||
| 492 | kref_put(&zone->refcnt, reada_zone_release); | ||
| 493 | } | ||
| 494 | spin_unlock(&zone->lock); | ||
| 495 | |||
| 496 | spin_lock(&fs_info->reada_lock); | ||
| 497 | kref_put(&zone->refcnt, reada_zone_release); | ||
| 498 | spin_unlock(&fs_info->reada_lock); | ||
| 499 | } | ||
| 500 | if (re->scheduled_for) | ||
| 501 | atomic_dec(&re->scheduled_for->reada_in_flight); | ||
| 502 | |||
| 503 | kfree(re); | ||
| 504 | } | ||
| 505 | |||
| 506 | static void reada_zone_release(struct kref *kref) | ||
| 507 | { | ||
| 508 | struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); | ||
| 509 | |||
| 510 | radix_tree_delete(&zone->device->reada_zones, | ||
| 511 | zone->end >> PAGE_CACHE_SHIFT); | ||
| 512 | |||
| 513 | kfree(zone); | ||
| 514 | } | ||
| 515 | |||
| 516 | static void reada_control_release(struct kref *kref) | ||
| 517 | { | ||
| 518 | struct reada_control *rc = container_of(kref, struct reada_control, | ||
| 519 | refcnt); | ||
| 520 | |||
| 521 | kfree(rc); | ||
| 522 | } | ||
| 523 | |||
| 524 | static int reada_add_block(struct reada_control *rc, u64 logical, | ||
| 525 | struct btrfs_key *top, int level, u64 generation) | ||
| 526 | { | ||
| 527 | struct btrfs_root *root = rc->root; | ||
| 528 | struct reada_extent *re; | ||
| 529 | struct reada_extctl *rec; | ||
| 530 | |||
| 531 | re = reada_find_extent(root, logical, top, level); /* takes one ref */ | ||
| 532 | if (!re) | ||
| 533 | return -1; | ||
| 534 | |||
| 535 | rec = kzalloc(sizeof(*rec), GFP_NOFS); | ||
| 536 | if (!rec) { | ||
| 537 | reada_extent_put(root->fs_info, re); | ||
| 538 | return -1; | ||
| 539 | } | ||
| 540 | |||
| 541 | rec->rc = rc; | ||
| 542 | rec->generation = generation; | ||
| 543 | atomic_inc(&rc->elems); | ||
| 544 | |||
| 545 | spin_lock(&re->lock); | ||
| 546 | list_add_tail(&rec->list, &re->extctl); | ||
| 547 | spin_unlock(&re->lock); | ||
| 548 | |||
| 549 | /* leave the ref on the extent */ | ||
| 550 | |||
| 551 | return 0; | ||
| 552 | } | ||
| 553 | |||
| 554 | /* | ||
| 555 | * called with fs_info->reada_lock held | ||
| 556 | */ | ||
| 557 | static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock) | ||
| 558 | { | ||
| 559 | int i; | ||
| 560 | unsigned long index = zone->end >> PAGE_CACHE_SHIFT; | ||
| 561 | |||
| 562 | for (i = 0; i < zone->ndevs; ++i) { | ||
| 563 | struct reada_zone *peer; | ||
| 564 | peer = radix_tree_lookup(&zone->devs[i]->reada_zones, index); | ||
| 565 | if (peer && peer->device != zone->device) | ||
| 566 | peer->locked = lock; | ||
| 567 | } | ||
| 568 | } | ||
| 569 | |||
| 570 | /* | ||
| 571 | * called with fs_info->reada_lock held | ||
| 572 | */ | ||
| 573 | static int reada_pick_zone(struct btrfs_device *dev) | ||
| 574 | { | ||
| 575 | struct reada_zone *top_zone = NULL; | ||
| 576 | struct reada_zone *top_locked_zone = NULL; | ||
| 577 | u64 top_elems = 0; | ||
| 578 | u64 top_locked_elems = 0; | ||
| 579 | unsigned long index = 0; | ||
| 580 | int ret; | ||
| 581 | |||
| 582 | if (dev->reada_curr_zone) { | ||
| 583 | reada_peer_zones_set_lock(dev->reada_curr_zone, 0); | ||
| 584 | kref_put(&dev->reada_curr_zone->refcnt, reada_zone_release); | ||
| 585 | dev->reada_curr_zone = NULL; | ||
| 586 | } | ||
| 587 | /* pick the zone with the most elements */ | ||
| 588 | while (1) { | ||
| 589 | struct reada_zone *zone; | ||
| 590 | |||
| 591 | ret = radix_tree_gang_lookup(&dev->reada_zones, | ||
| 592 | (void **)&zone, index, 1); | ||
| 593 | if (ret == 0) | ||
| 594 | break; | ||
| 595 | index = (zone->end >> PAGE_CACHE_SHIFT) + 1; | ||
| 596 | if (zone->locked) { | ||
| 597 | if (zone->elems > top_locked_elems) { | ||
| 598 | top_locked_elems = zone->elems; | ||
| 599 | top_locked_zone = zone; | ||
| 600 | } | ||
| 601 | } else { | ||
| 602 | if (zone->elems > top_elems) { | ||
| 603 | top_elems = zone->elems; | ||
| 604 | top_zone = zone; | ||
| 605 | } | ||
| 606 | } | ||
| 607 | } | ||
| 608 | if (top_zone) | ||
| 609 | dev->reada_curr_zone = top_zone; | ||
| 610 | else if (top_locked_zone) | ||
| 611 | dev->reada_curr_zone = top_locked_zone; | ||
| 612 | else | ||
| 613 | return 0; | ||
| 614 | |||
| 615 | dev->reada_next = dev->reada_curr_zone->start; | ||
| 616 | kref_get(&dev->reada_curr_zone->refcnt); | ||
| 617 | reada_peer_zones_set_lock(dev->reada_curr_zone, 1); | ||
| 618 | |||
| 619 | return 1; | ||
| 620 | } | ||
| 621 | |||
| 622 | static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | ||
| 623 | struct btrfs_device *dev) | ||
| 624 | { | ||
| 625 | struct reada_extent *re = NULL; | ||
| 626 | int mirror_num = 0; | ||
| 627 | struct extent_buffer *eb = NULL; | ||
| 628 | u64 logical; | ||
| 629 | u32 blocksize; | ||
| 630 | int ret; | ||
| 631 | int i; | ||
| 632 | int need_kick = 0; | ||
| 633 | |||
| 634 | spin_lock(&fs_info->reada_lock); | ||
| 635 | if (dev->reada_curr_zone == NULL) { | ||
| 636 | ret = reada_pick_zone(dev); | ||
| 637 | if (!ret) { | ||
| 638 | spin_unlock(&fs_info->reada_lock); | ||
| 639 | return 0; | ||
| 640 | } | ||
| 641 | } | ||
| 642 | /* | ||
| 643 | * FIXME currently we issue the reads one extent at a time. If we have | ||
| 644 | * a contiguous block of extents, we could also coagulate them or use | ||
| 645 | * plugging to speed things up | ||
| 646 | */ | ||
| 647 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, | ||
| 648 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); | ||
| 649 | if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { | ||
| 650 | ret = reada_pick_zone(dev); | ||
| 651 | if (!ret) { | ||
| 652 | spin_unlock(&fs_info->reada_lock); | ||
| 653 | return 0; | ||
| 654 | } | ||
| 655 | re = NULL; | ||
| 656 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, | ||
| 657 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); | ||
| 658 | } | ||
| 659 | if (ret == 0) { | ||
| 660 | spin_unlock(&fs_info->reada_lock); | ||
| 661 | return 0; | ||
| 662 | } | ||
| 663 | dev->reada_next = re->logical + re->blocksize; | ||
| 664 | kref_get(&re->refcnt); | ||
| 665 | |||
| 666 | spin_unlock(&fs_info->reada_lock); | ||
| 667 | |||
| 668 | /* | ||
| 669 | * find mirror num | ||
| 670 | */ | ||
| 671 | for (i = 0; i < re->nzones; ++i) { | ||
| 672 | if (re->zones[i]->device == dev) { | ||
| 673 | mirror_num = i + 1; | ||
| 674 | break; | ||
| 675 | } | ||
| 676 | } | ||
| 677 | logical = re->logical; | ||
| 678 | blocksize = re->blocksize; | ||
| 679 | |||
| 680 | spin_lock(&re->lock); | ||
| 681 | if (re->scheduled_for == NULL) { | ||
| 682 | re->scheduled_for = dev; | ||
| 683 | need_kick = 1; | ||
| 684 | } | ||
| 685 | spin_unlock(&re->lock); | ||
| 686 | |||
| 687 | reada_extent_put(fs_info, re); | ||
| 688 | |||
| 689 | if (!need_kick) | ||
| 690 | return 0; | ||
| 691 | |||
| 692 | atomic_inc(&dev->reada_in_flight); | ||
| 693 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize, | ||
| 694 | mirror_num, &eb); | ||
| 695 | if (ret) | ||
| 696 | __readahead_hook(fs_info->extent_root, NULL, logical, ret); | ||
| 697 | else if (eb) | ||
| 698 | __readahead_hook(fs_info->extent_root, eb, eb->start, ret); | ||
| 699 | |||
| 700 | if (eb) | ||
| 701 | free_extent_buffer(eb); | ||
| 702 | |||
| 703 | return 1; | ||
| 704 | |||
| 705 | } | ||
| 706 | |||
| 707 | static void reada_start_machine_worker(struct btrfs_work *work) | ||
| 708 | { | ||
| 709 | struct reada_machine_work *rmw; | ||
| 710 | struct btrfs_fs_info *fs_info; | ||
| 711 | |||
| 712 | rmw = container_of(work, struct reada_machine_work, work); | ||
| 713 | fs_info = rmw->fs_info; | ||
| 714 | |||
| 715 | kfree(rmw); | ||
| 716 | |||
| 717 | __reada_start_machine(fs_info); | ||
| 718 | } | ||
| 719 | |||
| 720 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) | ||
| 721 | { | ||
| 722 | struct btrfs_device *device; | ||
| 723 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
| 724 | u64 enqueued; | ||
| 725 | u64 total = 0; | ||
| 726 | int i; | ||
| 727 | |||
| 728 | do { | ||
| 729 | enqueued = 0; | ||
| 730 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
| 731 | if (atomic_read(&device->reada_in_flight) < | ||
| 732 | MAX_IN_FLIGHT) | ||
| 733 | enqueued += reada_start_machine_dev(fs_info, | ||
| 734 | device); | ||
| 735 | } | ||
| 736 | total += enqueued; | ||
| 737 | } while (enqueued && total < 10000); | ||
| 738 | |||
| 739 | if (enqueued == 0) | ||
| 740 | return; | ||
| 741 | |||
| 742 | /* | ||
| 743 | * If everything is already in the cache, this is effectively single | ||
| 744 | * threaded. To a) not hold the caller for too long and b) to utilize | ||
| 745 | * more cores, we broke the loop above after 10000 iterations and now | ||
| 746 | * enqueue to workers to finish it. This will distribute the load to | ||
| 747 | * the cores. | ||
| 748 | */ | ||
| 749 | for (i = 0; i < 2; ++i) | ||
| 750 | reada_start_machine(fs_info); | ||
| 751 | } | ||
| 752 | |||
| 753 | static void reada_start_machine(struct btrfs_fs_info *fs_info) | ||
| 754 | { | ||
| 755 | struct reada_machine_work *rmw; | ||
| 756 | |||
| 757 | rmw = kzalloc(sizeof(*rmw), GFP_NOFS); | ||
| 758 | if (!rmw) { | ||
| 759 | /* FIXME we cannot handle this properly right now */ | ||
| 760 | BUG(); | ||
| 761 | } | ||
| 762 | rmw->work.func = reada_start_machine_worker; | ||
| 763 | rmw->fs_info = fs_info; | ||
| 764 | |||
| 765 | btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); | ||
| 766 | } | ||
| 767 | |||
| 768 | #ifdef DEBUG | ||
| 769 | static void dump_devs(struct btrfs_fs_info *fs_info, int all) | ||
| 770 | { | ||
| 771 | struct btrfs_device *device; | ||
| 772 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
| 773 | unsigned long index; | ||
| 774 | int ret; | ||
| 775 | int i; | ||
| 776 | int j; | ||
| 777 | int cnt; | ||
| 778 | |||
| 779 | spin_lock(&fs_info->reada_lock); | ||
| 780 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
| 781 | printk(KERN_DEBUG "dev %lld has %d in flight\n", device->devid, | ||
| 782 | atomic_read(&device->reada_in_flight)); | ||
| 783 | index = 0; | ||
| 784 | while (1) { | ||
| 785 | struct reada_zone *zone; | ||
| 786 | ret = radix_tree_gang_lookup(&device->reada_zones, | ||
| 787 | (void **)&zone, index, 1); | ||
| 788 | if (ret == 0) | ||
| 789 | break; | ||
| 790 | printk(KERN_DEBUG " zone %llu-%llu elems %llu locked " | ||
| 791 | "%d devs", zone->start, zone->end, zone->elems, | ||
| 792 | zone->locked); | ||
| 793 | for (j = 0; j < zone->ndevs; ++j) { | ||
| 794 | printk(KERN_CONT " %lld", | ||
| 795 | zone->devs[j]->devid); | ||
| 796 | } | ||
| 797 | if (device->reada_curr_zone == zone) | ||
| 798 | printk(KERN_CONT " curr off %llu", | ||
| 799 | device->reada_next - zone->start); | ||
| 800 | printk(KERN_CONT "\n"); | ||
| 801 | index = (zone->end >> PAGE_CACHE_SHIFT) + 1; | ||
| 802 | } | ||
| 803 | cnt = 0; | ||
| 804 | index = 0; | ||
| 805 | while (all) { | ||
| 806 | struct reada_extent *re = NULL; | ||
| 807 | |||
| 808 | ret = radix_tree_gang_lookup(&device->reada_extents, | ||
| 809 | (void **)&re, index, 1); | ||
| 810 | if (ret == 0) | ||
| 811 | break; | ||
| 812 | printk(KERN_DEBUG | ||
| 813 | " re: logical %llu size %u empty %d for %lld", | ||
| 814 | re->logical, re->blocksize, | ||
| 815 | list_empty(&re->extctl), re->scheduled_for ? | ||
| 816 | re->scheduled_for->devid : -1); | ||
| 817 | |||
| 818 | for (i = 0; i < re->nzones; ++i) { | ||
| 819 | printk(KERN_CONT " zone %llu-%llu devs", | ||
| 820 | re->zones[i]->start, | ||
| 821 | re->zones[i]->end); | ||
| 822 | for (j = 0; j < re->zones[i]->ndevs; ++j) { | ||
| 823 | printk(KERN_CONT " %lld", | ||
| 824 | re->zones[i]->devs[j]->devid); | ||
| 825 | } | ||
| 826 | } | ||
| 827 | printk(KERN_CONT "\n"); | ||
| 828 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | ||
| 829 | if (++cnt > 15) | ||
| 830 | break; | ||
| 831 | } | ||
| 832 | } | ||
| 833 | |||
| 834 | index = 0; | ||
| 835 | cnt = 0; | ||
| 836 | while (all) { | ||
| 837 | struct reada_extent *re = NULL; | ||
| 838 | |||
| 839 | ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re, | ||
| 840 | index, 1); | ||
| 841 | if (ret == 0) | ||
| 842 | break; | ||
| 843 | if (!re->scheduled_for) { | ||
| 844 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | ||
| 845 | continue; | ||
| 846 | } | ||
| 847 | printk(KERN_DEBUG | ||
| 848 | "re: logical %llu size %u list empty %d for %lld", | ||
| 849 | re->logical, re->blocksize, list_empty(&re->extctl), | ||
| 850 | re->scheduled_for ? re->scheduled_for->devid : -1); | ||
| 851 | for (i = 0; i < re->nzones; ++i) { | ||
| 852 | printk(KERN_CONT " zone %llu-%llu devs", | ||
| 853 | re->zones[i]->start, | ||
| 854 | re->zones[i]->end); | ||
| 855 | for (i = 0; i < re->nzones; ++i) { | ||
| 856 | printk(KERN_CONT " zone %llu-%llu devs", | ||
| 857 | re->zones[i]->start, | ||
| 858 | re->zones[i]->end); | ||
| 859 | for (j = 0; j < re->zones[i]->ndevs; ++j) { | ||
| 860 | printk(KERN_CONT " %lld", | ||
| 861 | re->zones[i]->devs[j]->devid); | ||
| 862 | } | ||
| 863 | } | ||
| 864 | } | ||
| 865 | printk(KERN_CONT "\n"); | ||
| 866 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | ||
| 867 | } | ||
| 868 | spin_unlock(&fs_info->reada_lock); | ||
| 869 | } | ||
| 870 | #endif | ||
| 871 | |||
| 872 | /* | ||
| 873 | * interface | ||
| 874 | */ | ||
| 875 | struct reada_control *btrfs_reada_add(struct btrfs_root *root, | ||
| 876 | struct btrfs_key *key_start, struct btrfs_key *key_end) | ||
| 877 | { | ||
| 878 | struct reada_control *rc; | ||
| 879 | u64 start; | ||
| 880 | u64 generation; | ||
| 881 | int level; | ||
| 882 | struct extent_buffer *node; | ||
| 883 | static struct btrfs_key max_key = { | ||
| 884 | .objectid = (u64)-1, | ||
| 885 | .type = (u8)-1, | ||
| 886 | .offset = (u64)-1 | ||
| 887 | }; | ||
| 888 | |||
| 889 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
| 890 | if (!rc) | ||
| 891 | return ERR_PTR(-ENOMEM); | ||
| 892 | |||
| 893 | rc->root = root; | ||
| 894 | rc->key_start = *key_start; | ||
| 895 | rc->key_end = *key_end; | ||
| 896 | atomic_set(&rc->elems, 0); | ||
| 897 | init_waitqueue_head(&rc->wait); | ||
| 898 | kref_init(&rc->refcnt); | ||
| 899 | kref_get(&rc->refcnt); /* one ref for having elements */ | ||
| 900 | |||
| 901 | node = btrfs_root_node(root); | ||
| 902 | start = node->start; | ||
| 903 | level = btrfs_header_level(node); | ||
| 904 | generation = btrfs_header_generation(node); | ||
| 905 | free_extent_buffer(node); | ||
| 906 | |||
| 907 | reada_add_block(rc, start, &max_key, level, generation); | ||
| 908 | |||
| 909 | reada_start_machine(root->fs_info); | ||
| 910 | |||
| 911 | return rc; | ||
| 912 | } | ||
| 913 | |||
| 914 | #ifdef DEBUG | ||
| 915 | int btrfs_reada_wait(void *handle) | ||
| 916 | { | ||
| 917 | struct reada_control *rc = handle; | ||
| 918 | |||
| 919 | while (atomic_read(&rc->elems)) { | ||
| 920 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, | ||
| 921 | 5 * HZ); | ||
| 922 | dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); | ||
| 923 | } | ||
| 924 | |||
| 925 | dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); | ||
| 926 | |||
| 927 | kref_put(&rc->refcnt, reada_control_release); | ||
| 928 | |||
| 929 | return 0; | ||
| 930 | } | ||
| 931 | #else | ||
| 932 | int btrfs_reada_wait(void *handle) | ||
| 933 | { | ||
| 934 | struct reada_control *rc = handle; | ||
| 935 | |||
| 936 | while (atomic_read(&rc->elems)) { | ||
| 937 | wait_event(rc->wait, atomic_read(&rc->elems) == 0); | ||
| 938 | } | ||
| 939 | |||
| 940 | kref_put(&rc->refcnt, reada_control_release); | ||
| 941 | |||
| 942 | return 0; | ||
| 943 | } | ||
| 944 | #endif | ||
| 945 | |||
| 946 | void btrfs_reada_detach(void *handle) | ||
| 947 | { | ||
| 948 | struct reada_control *rc = handle; | ||
| 949 | |||
| 950 | kref_put(&rc->refcnt, reada_control_release); | ||
| 951 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 59bb1764273d..24d654ce7a06 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -2041,8 +2041,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 2041 | BUG_ON(IS_ERR(trans)); | 2041 | BUG_ON(IS_ERR(trans)); |
| 2042 | trans->block_rsv = rc->block_rsv; | 2042 | trans->block_rsv = rc->block_rsv; |
| 2043 | 2043 | ||
| 2044 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, | 2044 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved); |
| 2045 | min_reserved, 0); | ||
| 2046 | if (ret) { | 2045 | if (ret) { |
| 2047 | BUG_ON(ret != -EAGAIN); | 2046 | BUG_ON(ret != -EAGAIN); |
| 2048 | ret = btrfs_commit_transaction(trans, root); | 2047 | ret = btrfs_commit_transaction(trans, root); |
| @@ -2152,8 +2151,7 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
| 2152 | again: | 2151 | again: |
| 2153 | if (!err) { | 2152 | if (!err) { |
| 2154 | num_bytes = rc->merging_rsv_size; | 2153 | num_bytes = rc->merging_rsv_size; |
| 2155 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | 2154 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); |
| 2156 | num_bytes); | ||
| 2157 | if (ret) | 2155 | if (ret) |
| 2158 | err = ret; | 2156 | err = ret; |
| 2159 | } | 2157 | } |
| @@ -2427,7 +2425,7 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, | |||
| 2427 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; | 2425 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
| 2428 | 2426 | ||
| 2429 | trans->block_rsv = rc->block_rsv; | 2427 | trans->block_rsv = rc->block_rsv; |
| 2430 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); | 2428 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); |
| 2431 | if (ret) { | 2429 | if (ret) { |
| 2432 | if (ret == -EAGAIN) | 2430 | if (ret == -EAGAIN) |
| 2433 | rc->commit_transaction = 1; | 2431 | rc->commit_transaction = 1; |
| @@ -2922,6 +2920,7 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2922 | unsigned long last_index; | 2920 | unsigned long last_index; |
| 2923 | struct page *page; | 2921 | struct page *page; |
| 2924 | struct file_ra_state *ra; | 2922 | struct file_ra_state *ra; |
| 2923 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | ||
| 2925 | int nr = 0; | 2924 | int nr = 0; |
| 2926 | int ret = 0; | 2925 | int ret = 0; |
| 2927 | 2926 | ||
| @@ -2956,7 +2955,7 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2956 | ra, NULL, index, | 2955 | ra, NULL, index, |
| 2957 | last_index + 1 - index); | 2956 | last_index + 1 - index); |
| 2958 | page = find_or_create_page(inode->i_mapping, index, | 2957 | page = find_or_create_page(inode->i_mapping, index, |
| 2959 | GFP_NOFS); | 2958 | mask); |
| 2960 | if (!page) { | 2959 | if (!page) { |
| 2961 | btrfs_delalloc_release_metadata(inode, | 2960 | btrfs_delalloc_release_metadata(inode, |
| 2962 | PAGE_CACHE_SIZE); | 2961 | PAGE_CACHE_SIZE); |
| @@ -3323,8 +3322,11 @@ static int find_data_references(struct reloc_control *rc, | |||
| 3323 | } | 3322 | } |
| 3324 | 3323 | ||
| 3325 | key.objectid = ref_objectid; | 3324 | key.objectid = ref_objectid; |
| 3326 | key.offset = ref_offset; | ||
| 3327 | key.type = BTRFS_EXTENT_DATA_KEY; | 3325 | key.type = BTRFS_EXTENT_DATA_KEY; |
| 3326 | if (ref_offset > ((u64)-1 << 32)) | ||
| 3327 | key.offset = 0; | ||
| 3328 | else | ||
| 3329 | key.offset = ref_offset; | ||
| 3328 | 3330 | ||
| 3329 | path->search_commit_root = 1; | 3331 | path->search_commit_root = 1; |
| 3330 | path->skip_locking = 1; | 3332 | path->skip_locking = 1; |
| @@ -3645,14 +3647,11 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
| 3645 | * btrfs_init_reloc_root will use them when there | 3647 | * btrfs_init_reloc_root will use them when there |
| 3646 | * is no reservation in transaction handle. | 3648 | * is no reservation in transaction handle. |
| 3647 | */ | 3649 | */ |
| 3648 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | 3650 | ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, |
| 3649 | rc->extent_root->nodesize * 256); | 3651 | rc->extent_root->nodesize * 256); |
| 3650 | if (ret) | 3652 | if (ret) |
| 3651 | return ret; | 3653 | return ret; |
| 3652 | 3654 | ||
| 3653 | rc->block_rsv->refill_used = 1; | ||
| 3654 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
| 3655 | |||
| 3656 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | 3655 | memset(&rc->cluster, 0, sizeof(rc->cluster)); |
| 3657 | rc->search_start = rc->block_group->key.objectid; | 3656 | rc->search_start = rc->block_group->key.objectid; |
| 3658 | rc->extents_found = 0; | 3657 | rc->extents_found = 0; |
| @@ -3777,8 +3776,7 @@ restart: | |||
| 3777 | } | 3776 | } |
| 3778 | } | 3777 | } |
| 3779 | 3778 | ||
| 3780 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | 3779 | ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); |
| 3781 | rc->block_rsv, 0, 5); | ||
| 3782 | if (ret < 0) { | 3780 | if (ret < 0) { |
| 3783 | if (ret != -EAGAIN) { | 3781 | if (ret != -EAGAIN) { |
| 3784 | err = ret; | 3782 | err = ret; |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a8d03d5efb5d..ed11d3866afd 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -17,10 +17,14 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/blkdev.h> | 19 | #include <linux/blkdev.h> |
| 20 | #include <linux/ratelimit.h> | ||
| 20 | #include "ctree.h" | 21 | #include "ctree.h" |
| 21 | #include "volumes.h" | 22 | #include "volumes.h" |
| 22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
| 23 | #include "ordered-data.h" | 24 | #include "ordered-data.h" |
| 25 | #include "transaction.h" | ||
| 26 | #include "backref.h" | ||
| 27 | #include "extent_io.h" | ||
| 24 | 28 | ||
| 25 | /* | 29 | /* |
| 26 | * This is only the first step towards a full-features scrub. It reads all | 30 | * This is only the first step towards a full-features scrub. It reads all |
| @@ -29,15 +33,12 @@ | |||
| 29 | * any can be found. | 33 | * any can be found. |
| 30 | * | 34 | * |
| 31 | * Future enhancements: | 35 | * Future enhancements: |
| 32 | * - To enhance the performance, better read-ahead strategies for the | ||
| 33 | * extent-tree can be employed. | ||
| 34 | * - In case an unrepairable extent is encountered, track which files are | 36 | * - In case an unrepairable extent is encountered, track which files are |
| 35 | * affected and report them | 37 | * affected and report them |
| 36 | * - In case of a read error on files with nodatasum, map the file and read | 38 | * - In case of a read error on files with nodatasum, map the file and read |
| 37 | * the extent to trigger a writeback of the good copy | 39 | * the extent to trigger a writeback of the good copy |
| 38 | * - track and record media errors, throw out bad devices | 40 | * - track and record media errors, throw out bad devices |
| 39 | * - add a mode to also read unallocated space | 41 | * - add a mode to also read unallocated space |
| 40 | * - make the prefetch cancellable | ||
| 41 | */ | 42 | */ |
| 42 | 43 | ||
| 43 | struct scrub_bio; | 44 | struct scrub_bio; |
| @@ -63,7 +64,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix); | |||
| 63 | struct scrub_page { | 64 | struct scrub_page { |
| 64 | u64 flags; /* extent flags */ | 65 | u64 flags; /* extent flags */ |
| 65 | u64 generation; | 66 | u64 generation; |
| 66 | u64 mirror_num; | 67 | int mirror_num; |
| 67 | int have_csum; | 68 | int have_csum; |
| 68 | u8 csum[BTRFS_CSUM_SIZE]; | 69 | u8 csum[BTRFS_CSUM_SIZE]; |
| 69 | }; | 70 | }; |
| @@ -87,6 +88,7 @@ struct scrub_dev { | |||
| 87 | int first_free; | 88 | int first_free; |
| 88 | int curr; | 89 | int curr; |
| 89 | atomic_t in_flight; | 90 | atomic_t in_flight; |
| 91 | atomic_t fixup_cnt; | ||
| 90 | spinlock_t list_lock; | 92 | spinlock_t list_lock; |
| 91 | wait_queue_head_t list_wait; | 93 | wait_queue_head_t list_wait; |
| 92 | u16 csum_size; | 94 | u16 csum_size; |
| @@ -100,6 +102,27 @@ struct scrub_dev { | |||
| 100 | spinlock_t stat_lock; | 102 | spinlock_t stat_lock; |
| 101 | }; | 103 | }; |
| 102 | 104 | ||
| 105 | struct scrub_fixup_nodatasum { | ||
| 106 | struct scrub_dev *sdev; | ||
| 107 | u64 logical; | ||
| 108 | struct btrfs_root *root; | ||
| 109 | struct btrfs_work work; | ||
| 110 | int mirror_num; | ||
| 111 | }; | ||
| 112 | |||
| 113 | struct scrub_warning { | ||
| 114 | struct btrfs_path *path; | ||
| 115 | u64 extent_item_size; | ||
| 116 | char *scratch_buf; | ||
| 117 | char *msg_buf; | ||
| 118 | const char *errstr; | ||
| 119 | sector_t sector; | ||
| 120 | u64 logical; | ||
| 121 | struct btrfs_device *dev; | ||
| 122 | int msg_bufsize; | ||
| 123 | int scratch_bufsize; | ||
| 124 | }; | ||
| 125 | |||
| 103 | static void scrub_free_csums(struct scrub_dev *sdev) | 126 | static void scrub_free_csums(struct scrub_dev *sdev) |
| 104 | { | 127 | { |
| 105 | while (!list_empty(&sdev->csum_list)) { | 128 | while (!list_empty(&sdev->csum_list)) { |
| @@ -175,14 +198,15 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
| 175 | 198 | ||
| 176 | if (i != SCRUB_BIOS_PER_DEV-1) | 199 | if (i != SCRUB_BIOS_PER_DEV-1) |
| 177 | sdev->bios[i]->next_free = i + 1; | 200 | sdev->bios[i]->next_free = i + 1; |
| 178 | else | 201 | else |
| 179 | sdev->bios[i]->next_free = -1; | 202 | sdev->bios[i]->next_free = -1; |
| 180 | } | 203 | } |
| 181 | sdev->first_free = 0; | 204 | sdev->first_free = 0; |
| 182 | sdev->curr = -1; | 205 | sdev->curr = -1; |
| 183 | atomic_set(&sdev->in_flight, 0); | 206 | atomic_set(&sdev->in_flight, 0); |
| 207 | atomic_set(&sdev->fixup_cnt, 0); | ||
| 184 | atomic_set(&sdev->cancel_req, 0); | 208 | atomic_set(&sdev->cancel_req, 0); |
| 185 | sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); | 209 | sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy); |
| 186 | INIT_LIST_HEAD(&sdev->csum_list); | 210 | INIT_LIST_HEAD(&sdev->csum_list); |
| 187 | 211 | ||
| 188 | spin_lock_init(&sdev->list_lock); | 212 | spin_lock_init(&sdev->list_lock); |
| @@ -195,24 +219,361 @@ nomem: | |||
| 195 | return ERR_PTR(-ENOMEM); | 219 | return ERR_PTR(-ENOMEM); |
| 196 | } | 220 | } |
| 197 | 221 | ||
| 222 | static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | ||
| 223 | { | ||
| 224 | u64 isize; | ||
| 225 | u32 nlink; | ||
| 226 | int ret; | ||
| 227 | int i; | ||
| 228 | struct extent_buffer *eb; | ||
| 229 | struct btrfs_inode_item *inode_item; | ||
| 230 | struct scrub_warning *swarn = ctx; | ||
| 231 | struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; | ||
| 232 | struct inode_fs_paths *ipath = NULL; | ||
| 233 | struct btrfs_root *local_root; | ||
| 234 | struct btrfs_key root_key; | ||
| 235 | |||
| 236 | root_key.objectid = root; | ||
| 237 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 238 | root_key.offset = (u64)-1; | ||
| 239 | local_root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
| 240 | if (IS_ERR(local_root)) { | ||
| 241 | ret = PTR_ERR(local_root); | ||
| 242 | goto err; | ||
| 243 | } | ||
| 244 | |||
| 245 | ret = inode_item_info(inum, 0, local_root, swarn->path); | ||
| 246 | if (ret) { | ||
| 247 | btrfs_release_path(swarn->path); | ||
| 248 | goto err; | ||
| 249 | } | ||
| 250 | |||
| 251 | eb = swarn->path->nodes[0]; | ||
| 252 | inode_item = btrfs_item_ptr(eb, swarn->path->slots[0], | ||
| 253 | struct btrfs_inode_item); | ||
| 254 | isize = btrfs_inode_size(eb, inode_item); | ||
| 255 | nlink = btrfs_inode_nlink(eb, inode_item); | ||
| 256 | btrfs_release_path(swarn->path); | ||
| 257 | |||
| 258 | ipath = init_ipath(4096, local_root, swarn->path); | ||
| 259 | ret = paths_from_inode(inum, ipath); | ||
| 260 | |||
| 261 | if (ret < 0) | ||
| 262 | goto err; | ||
| 263 | |||
| 264 | /* | ||
| 265 | * we deliberately ignore the bit ipath might have been too small to | ||
| 266 | * hold all of the paths here | ||
| 267 | */ | ||
| 268 | for (i = 0; i < ipath->fspath->elem_cnt; ++i) | ||
| 269 | printk(KERN_WARNING "btrfs: %s at logical %llu on dev " | ||
| 270 | "%s, sector %llu, root %llu, inode %llu, offset %llu, " | ||
| 271 | "length %llu, links %u (path: %s)\n", swarn->errstr, | ||
| 272 | swarn->logical, swarn->dev->name, | ||
| 273 | (unsigned long long)swarn->sector, root, inum, offset, | ||
| 274 | min(isize - offset, (u64)PAGE_SIZE), nlink, | ||
| 275 | (char *)ipath->fspath->val[i]); | ||
| 276 | |||
| 277 | free_ipath(ipath); | ||
| 278 | return 0; | ||
| 279 | |||
| 280 | err: | ||
| 281 | printk(KERN_WARNING "btrfs: %s at logical %llu on dev " | ||
| 282 | "%s, sector %llu, root %llu, inode %llu, offset %llu: path " | ||
| 283 | "resolving failed with ret=%d\n", swarn->errstr, | ||
| 284 | swarn->logical, swarn->dev->name, | ||
| 285 | (unsigned long long)swarn->sector, root, inum, offset, ret); | ||
| 286 | |||
| 287 | free_ipath(ipath); | ||
| 288 | return 0; | ||
| 289 | } | ||
| 290 | |||
| 291 | static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, | ||
| 292 | int ix) | ||
| 293 | { | ||
| 294 | struct btrfs_device *dev = sbio->sdev->dev; | ||
| 295 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | ||
| 296 | struct btrfs_path *path; | ||
| 297 | struct btrfs_key found_key; | ||
| 298 | struct extent_buffer *eb; | ||
| 299 | struct btrfs_extent_item *ei; | ||
| 300 | struct scrub_warning swarn; | ||
| 301 | u32 item_size; | ||
| 302 | int ret; | ||
| 303 | u64 ref_root; | ||
| 304 | u8 ref_level; | ||
| 305 | unsigned long ptr = 0; | ||
| 306 | const int bufsize = 4096; | ||
| 307 | u64 extent_offset; | ||
| 308 | |||
| 309 | path = btrfs_alloc_path(); | ||
| 310 | |||
| 311 | swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); | ||
| 312 | swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); | ||
| 313 | swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9; | ||
| 314 | swarn.logical = sbio->logical + ix * PAGE_SIZE; | ||
| 315 | swarn.errstr = errstr; | ||
| 316 | swarn.dev = dev; | ||
| 317 | swarn.msg_bufsize = bufsize; | ||
| 318 | swarn.scratch_bufsize = bufsize; | ||
| 319 | |||
| 320 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) | ||
| 321 | goto out; | ||
| 322 | |||
| 323 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key); | ||
| 324 | if (ret < 0) | ||
| 325 | goto out; | ||
| 326 | |||
| 327 | extent_offset = swarn.logical - found_key.objectid; | ||
| 328 | swarn.extent_item_size = found_key.offset; | ||
| 329 | |||
| 330 | eb = path->nodes[0]; | ||
| 331 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | ||
| 332 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | ||
| 333 | |||
| 334 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
| 335 | do { | ||
| 336 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, | ||
| 337 | &ref_root, &ref_level); | ||
| 338 | printk(KERN_WARNING "%s at logical %llu on dev %s, " | ||
| 339 | "sector %llu: metadata %s (level %d) in tree " | ||
| 340 | "%llu\n", errstr, swarn.logical, dev->name, | ||
| 341 | (unsigned long long)swarn.sector, | ||
| 342 | ref_level ? "node" : "leaf", | ||
| 343 | ret < 0 ? -1 : ref_level, | ||
| 344 | ret < 0 ? -1 : ref_root); | ||
| 345 | } while (ret != 1); | ||
| 346 | } else { | ||
| 347 | swarn.path = path; | ||
| 348 | iterate_extent_inodes(fs_info, path, found_key.objectid, | ||
| 349 | extent_offset, | ||
| 350 | scrub_print_warning_inode, &swarn); | ||
| 351 | } | ||
| 352 | |||
| 353 | out: | ||
| 354 | btrfs_free_path(path); | ||
| 355 | kfree(swarn.scratch_buf); | ||
| 356 | kfree(swarn.msg_buf); | ||
| 357 | } | ||
| 358 | |||
| 359 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | ||
| 360 | { | ||
| 361 | struct page *page = NULL; | ||
| 362 | unsigned long index; | ||
| 363 | struct scrub_fixup_nodatasum *fixup = ctx; | ||
| 364 | int ret; | ||
| 365 | int corrected = 0; | ||
| 366 | struct btrfs_key key; | ||
| 367 | struct inode *inode = NULL; | ||
| 368 | u64 end = offset + PAGE_SIZE - 1; | ||
| 369 | struct btrfs_root *local_root; | ||
| 370 | |||
| 371 | key.objectid = root; | ||
| 372 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 373 | key.offset = (u64)-1; | ||
| 374 | local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); | ||
| 375 | if (IS_ERR(local_root)) | ||
| 376 | return PTR_ERR(local_root); | ||
| 377 | |||
| 378 | key.type = BTRFS_INODE_ITEM_KEY; | ||
| 379 | key.objectid = inum; | ||
| 380 | key.offset = 0; | ||
| 381 | inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); | ||
| 382 | if (IS_ERR(inode)) | ||
| 383 | return PTR_ERR(inode); | ||
| 384 | |||
| 385 | index = offset >> PAGE_CACHE_SHIFT; | ||
| 386 | |||
| 387 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
| 388 | if (!page) { | ||
| 389 | ret = -ENOMEM; | ||
| 390 | goto out; | ||
| 391 | } | ||
| 392 | |||
| 393 | if (PageUptodate(page)) { | ||
| 394 | struct btrfs_mapping_tree *map_tree; | ||
| 395 | if (PageDirty(page)) { | ||
| 396 | /* | ||
| 397 | * we need to write the data to the defect sector. the | ||
| 398 | * data that was in that sector is not in memory, | ||
| 399 | * because the page was modified. we must not write the | ||
| 400 | * modified page to that sector. | ||
| 401 | * | ||
| 402 | * TODO: what could be done here: wait for the delalloc | ||
| 403 | * runner to write out that page (might involve | ||
| 404 | * COW) and see whether the sector is still | ||
| 405 | * referenced afterwards. | ||
| 406 | * | ||
| 407 | * For the meantime, we'll treat this error | ||
| 408 | * incorrectable, although there is a chance that a | ||
| 409 | * later scrub will find the bad sector again and that | ||
| 410 | * there's no dirty page in memory, then. | ||
| 411 | */ | ||
| 412 | ret = -EIO; | ||
| 413 | goto out; | ||
| 414 | } | ||
| 415 | map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; | ||
| 416 | ret = repair_io_failure(map_tree, offset, PAGE_SIZE, | ||
| 417 | fixup->logical, page, | ||
| 418 | fixup->mirror_num); | ||
| 419 | unlock_page(page); | ||
| 420 | corrected = !ret; | ||
| 421 | } else { | ||
| 422 | /* | ||
| 423 | * we need to get good data first. the general readpage path | ||
| 424 | * will call repair_io_failure for us, we just have to make | ||
| 425 | * sure we read the bad mirror. | ||
| 426 | */ | ||
| 427 | ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, | ||
| 428 | EXTENT_DAMAGED, GFP_NOFS); | ||
| 429 | if (ret) { | ||
| 430 | /* set_extent_bits should give proper error */ | ||
| 431 | WARN_ON(ret > 0); | ||
| 432 | if (ret > 0) | ||
| 433 | ret = -EFAULT; | ||
| 434 | goto out; | ||
| 435 | } | ||
| 436 | |||
| 437 | ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page, | ||
| 438 | btrfs_get_extent, | ||
| 439 | fixup->mirror_num); | ||
| 440 | wait_on_page_locked(page); | ||
| 441 | |||
| 442 | corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, | ||
| 443 | end, EXTENT_DAMAGED, 0, NULL); | ||
| 444 | if (!corrected) | ||
| 445 | clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, | ||
| 446 | EXTENT_DAMAGED, GFP_NOFS); | ||
| 447 | } | ||
| 448 | |||
| 449 | out: | ||
| 450 | if (page) | ||
| 451 | put_page(page); | ||
| 452 | if (inode) | ||
| 453 | iput(inode); | ||
| 454 | |||
| 455 | if (ret < 0) | ||
| 456 | return ret; | ||
| 457 | |||
| 458 | if (ret == 0 && corrected) { | ||
| 459 | /* | ||
| 460 | * we only need to call readpage for one of the inodes belonging | ||
| 461 | * to this extent. so make iterate_extent_inodes stop | ||
| 462 | */ | ||
| 463 | return 1; | ||
| 464 | } | ||
| 465 | |||
| 466 | return -EIO; | ||
| 467 | } | ||
| 468 | |||
| 469 | static void scrub_fixup_nodatasum(struct btrfs_work *work) | ||
| 470 | { | ||
| 471 | int ret; | ||
| 472 | struct scrub_fixup_nodatasum *fixup; | ||
| 473 | struct scrub_dev *sdev; | ||
| 474 | struct btrfs_trans_handle *trans = NULL; | ||
| 475 | struct btrfs_fs_info *fs_info; | ||
| 476 | struct btrfs_path *path; | ||
| 477 | int uncorrectable = 0; | ||
| 478 | |||
| 479 | fixup = container_of(work, struct scrub_fixup_nodatasum, work); | ||
| 480 | sdev = fixup->sdev; | ||
| 481 | fs_info = fixup->root->fs_info; | ||
| 482 | |||
| 483 | path = btrfs_alloc_path(); | ||
| 484 | if (!path) { | ||
| 485 | spin_lock(&sdev->stat_lock); | ||
| 486 | ++sdev->stat.malloc_errors; | ||
| 487 | spin_unlock(&sdev->stat_lock); | ||
| 488 | uncorrectable = 1; | ||
| 489 | goto out; | ||
| 490 | } | ||
| 491 | |||
| 492 | trans = btrfs_join_transaction(fixup->root); | ||
| 493 | if (IS_ERR(trans)) { | ||
| 494 | uncorrectable = 1; | ||
| 495 | goto out; | ||
| 496 | } | ||
| 497 | |||
| 498 | /* | ||
| 499 | * the idea is to trigger a regular read through the standard path. we | ||
| 500 | * read a page from the (failed) logical address by specifying the | ||
| 501 | * corresponding copynum of the failed sector. thus, that readpage is | ||
| 502 | * expected to fail. | ||
| 503 | * that is the point where on-the-fly error correction will kick in | ||
| 504 | * (once it's finished) and rewrite the failed sector if a good copy | ||
| 505 | * can be found. | ||
| 506 | */ | ||
| 507 | ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info, | ||
| 508 | path, scrub_fixup_readpage, | ||
| 509 | fixup); | ||
| 510 | if (ret < 0) { | ||
| 511 | uncorrectable = 1; | ||
| 512 | goto out; | ||
| 513 | } | ||
| 514 | WARN_ON(ret != 1); | ||
| 515 | |||
| 516 | spin_lock(&sdev->stat_lock); | ||
| 517 | ++sdev->stat.corrected_errors; | ||
| 518 | spin_unlock(&sdev->stat_lock); | ||
| 519 | |||
| 520 | out: | ||
| 521 | if (trans && !IS_ERR(trans)) | ||
| 522 | btrfs_end_transaction(trans, fixup->root); | ||
| 523 | if (uncorrectable) { | ||
| 524 | spin_lock(&sdev->stat_lock); | ||
| 525 | ++sdev->stat.uncorrectable_errors; | ||
| 526 | spin_unlock(&sdev->stat_lock); | ||
| 527 | printk_ratelimited(KERN_ERR "btrfs: unable to fixup " | ||
| 528 | "(nodatasum) error at logical %llu\n", | ||
| 529 | fixup->logical); | ||
| 530 | } | ||
| 531 | |||
| 532 | btrfs_free_path(path); | ||
| 533 | kfree(fixup); | ||
| 534 | |||
| 535 | /* see caller why we're pretending to be paused in the scrub counters */ | ||
| 536 | mutex_lock(&fs_info->scrub_lock); | ||
| 537 | atomic_dec(&fs_info->scrubs_running); | ||
| 538 | atomic_dec(&fs_info->scrubs_paused); | ||
| 539 | mutex_unlock(&fs_info->scrub_lock); | ||
| 540 | atomic_dec(&sdev->fixup_cnt); | ||
| 541 | wake_up(&fs_info->scrub_pause_wait); | ||
| 542 | wake_up(&sdev->list_wait); | ||
| 543 | } | ||
| 544 | |||
| 198 | /* | 545 | /* |
| 199 | * scrub_recheck_error gets called when either verification of the page | 546 | * scrub_recheck_error gets called when either verification of the page |
| 200 | * failed or the bio failed to read, e.g. with EIO. In the latter case, | 547 | * failed or the bio failed to read, e.g. with EIO. In the latter case, |
| 201 | * recheck_error gets called for every page in the bio, even though only | 548 | * recheck_error gets called for every page in the bio, even though only |
| 202 | * one may be bad | 549 | * one may be bad |
| 203 | */ | 550 | */ |
| 204 | static void scrub_recheck_error(struct scrub_bio *sbio, int ix) | 551 | static int scrub_recheck_error(struct scrub_bio *sbio, int ix) |
| 205 | { | 552 | { |
| 553 | struct scrub_dev *sdev = sbio->sdev; | ||
| 554 | u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9; | ||
| 555 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | ||
| 556 | DEFAULT_RATELIMIT_BURST); | ||
| 557 | |||
| 206 | if (sbio->err) { | 558 | if (sbio->err) { |
| 207 | if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, | 559 | if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector, |
| 208 | (sbio->physical + ix * PAGE_SIZE) >> 9, | ||
| 209 | sbio->bio->bi_io_vec[ix].bv_page) == 0) { | 560 | sbio->bio->bi_io_vec[ix].bv_page) == 0) { |
| 210 | if (scrub_fixup_check(sbio, ix) == 0) | 561 | if (scrub_fixup_check(sbio, ix) == 0) |
| 211 | return; | 562 | return 0; |
| 212 | } | 563 | } |
| 564 | if (__ratelimit(&_rs)) | ||
| 565 | scrub_print_warning("i/o error", sbio, ix); | ||
| 566 | } else { | ||
| 567 | if (__ratelimit(&_rs)) | ||
| 568 | scrub_print_warning("checksum error", sbio, ix); | ||
| 213 | } | 569 | } |
| 214 | 570 | ||
| 571 | spin_lock(&sdev->stat_lock); | ||
| 572 | ++sdev->stat.read_errors; | ||
| 573 | spin_unlock(&sdev->stat_lock); | ||
| 574 | |||
| 215 | scrub_fixup(sbio, ix); | 575 | scrub_fixup(sbio, ix); |
| 576 | return 1; | ||
| 216 | } | 577 | } |
| 217 | 578 | ||
| 218 | static int scrub_fixup_check(struct scrub_bio *sbio, int ix) | 579 | static int scrub_fixup_check(struct scrub_bio *sbio, int ix) |
| @@ -250,7 +611,8 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) | |||
| 250 | struct scrub_dev *sdev = sbio->sdev; | 611 | struct scrub_dev *sdev = sbio->sdev; |
| 251 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | 612 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; |
| 252 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | 613 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
| 253 | struct btrfs_multi_bio *multi = NULL; | 614 | struct btrfs_bio *bbio = NULL; |
| 615 | struct scrub_fixup_nodatasum *fixup; | ||
| 254 | u64 logical = sbio->logical + ix * PAGE_SIZE; | 616 | u64 logical = sbio->logical + ix * PAGE_SIZE; |
| 255 | u64 length; | 617 | u64 length; |
| 256 | int i; | 618 | int i; |
| @@ -259,38 +621,57 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) | |||
| 259 | 621 | ||
| 260 | if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && | 622 | if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && |
| 261 | (sbio->spag[ix].have_csum == 0)) { | 623 | (sbio->spag[ix].have_csum == 0)) { |
| 624 | fixup = kzalloc(sizeof(*fixup), GFP_NOFS); | ||
| 625 | if (!fixup) | ||
| 626 | goto uncorrectable; | ||
| 627 | fixup->sdev = sdev; | ||
| 628 | fixup->logical = logical; | ||
| 629 | fixup->root = fs_info->extent_root; | ||
| 630 | fixup->mirror_num = sbio->spag[ix].mirror_num; | ||
| 262 | /* | 631 | /* |
| 263 | * nodatasum, don't try to fix anything | 632 | * increment scrubs_running to prevent cancel requests from |
| 264 | * FIXME: we can do better, open the inode and trigger a | 633 | * completing as long as a fixup worker is running. we must also |
| 265 | * writeback | 634 | * increment scrubs_paused to prevent deadlocking on pause |
| 635 | * requests used for transactions commits (as the worker uses a | ||
| 636 | * transaction context). it is safe to regard the fixup worker | ||
| 637 | * as paused for all matters practical. effectively, we only | ||
| 638 | * avoid cancellation requests from completing. | ||
| 266 | */ | 639 | */ |
| 267 | goto uncorrectable; | 640 | mutex_lock(&fs_info->scrub_lock); |
| 641 | atomic_inc(&fs_info->scrubs_running); | ||
| 642 | atomic_inc(&fs_info->scrubs_paused); | ||
| 643 | mutex_unlock(&fs_info->scrub_lock); | ||
| 644 | atomic_inc(&sdev->fixup_cnt); | ||
| 645 | fixup->work.func = scrub_fixup_nodatasum; | ||
| 646 | btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work); | ||
| 647 | return; | ||
| 268 | } | 648 | } |
| 269 | 649 | ||
| 270 | length = PAGE_SIZE; | 650 | length = PAGE_SIZE; |
| 271 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, | 651 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, |
| 272 | &multi, 0); | 652 | &bbio, 0); |
| 273 | if (ret || !multi || length < PAGE_SIZE) { | 653 | if (ret || !bbio || length < PAGE_SIZE) { |
| 274 | printk(KERN_ERR | 654 | printk(KERN_ERR |
| 275 | "scrub_fixup: btrfs_map_block failed us for %llu\n", | 655 | "scrub_fixup: btrfs_map_block failed us for %llu\n", |
| 276 | (unsigned long long)logical); | 656 | (unsigned long long)logical); |
| 277 | WARN_ON(1); | 657 | WARN_ON(1); |
| 658 | kfree(bbio); | ||
| 278 | return; | 659 | return; |
| 279 | } | 660 | } |
| 280 | 661 | ||
| 281 | if (multi->num_stripes == 1) | 662 | if (bbio->num_stripes == 1) |
| 282 | /* there aren't any replicas */ | 663 | /* there aren't any replicas */ |
| 283 | goto uncorrectable; | 664 | goto uncorrectable; |
| 284 | 665 | ||
| 285 | /* | 666 | /* |
| 286 | * first find a good copy | 667 | * first find a good copy |
| 287 | */ | 668 | */ |
| 288 | for (i = 0; i < multi->num_stripes; ++i) { | 669 | for (i = 0; i < bbio->num_stripes; ++i) { |
| 289 | if (i == sbio->spag[ix].mirror_num) | 670 | if (i + 1 == sbio->spag[ix].mirror_num) |
| 290 | continue; | 671 | continue; |
| 291 | 672 | ||
| 292 | if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev, | 673 | if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev, |
| 293 | multi->stripes[i].physical >> 9, | 674 | bbio->stripes[i].physical >> 9, |
| 294 | sbio->bio->bi_io_vec[ix].bv_page)) { | 675 | sbio->bio->bi_io_vec[ix].bv_page)) { |
| 295 | /* I/O-error, this is not a good copy */ | 676 | /* I/O-error, this is not a good copy */ |
| 296 | continue; | 677 | continue; |
| @@ -299,7 +680,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) | |||
| 299 | if (scrub_fixup_check(sbio, ix) == 0) | 680 | if (scrub_fixup_check(sbio, ix) == 0) |
| 300 | break; | 681 | break; |
| 301 | } | 682 | } |
| 302 | if (i == multi->num_stripes) | 683 | if (i == bbio->num_stripes) |
| 303 | goto uncorrectable; | 684 | goto uncorrectable; |
| 304 | 685 | ||
| 305 | if (!sdev->readonly) { | 686 | if (!sdev->readonly) { |
| @@ -314,25 +695,23 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) | |||
| 314 | } | 695 | } |
| 315 | } | 696 | } |
| 316 | 697 | ||
| 317 | kfree(multi); | 698 | kfree(bbio); |
| 318 | spin_lock(&sdev->stat_lock); | 699 | spin_lock(&sdev->stat_lock); |
| 319 | ++sdev->stat.corrected_errors; | 700 | ++sdev->stat.corrected_errors; |
| 320 | spin_unlock(&sdev->stat_lock); | 701 | spin_unlock(&sdev->stat_lock); |
| 321 | 702 | ||
| 322 | if (printk_ratelimit()) | 703 | printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n", |
| 323 | printk(KERN_ERR "btrfs: fixed up at %llu\n", | 704 | (unsigned long long)logical); |
| 324 | (unsigned long long)logical); | ||
| 325 | return; | 705 | return; |
| 326 | 706 | ||
| 327 | uncorrectable: | 707 | uncorrectable: |
| 328 | kfree(multi); | 708 | kfree(bbio); |
| 329 | spin_lock(&sdev->stat_lock); | 709 | spin_lock(&sdev->stat_lock); |
| 330 | ++sdev->stat.uncorrectable_errors; | 710 | ++sdev->stat.uncorrectable_errors; |
| 331 | spin_unlock(&sdev->stat_lock); | 711 | spin_unlock(&sdev->stat_lock); |
| 332 | 712 | ||
| 333 | if (printk_ratelimit()) | 713 | printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at " |
| 334 | printk(KERN_ERR "btrfs: unable to fixup at %llu\n", | 714 | "logical %llu\n", (unsigned long long)logical); |
| 335 | (unsigned long long)logical); | ||
| 336 | } | 715 | } |
| 337 | 716 | ||
| 338 | static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | 717 | static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, |
| @@ -382,8 +761,14 @@ static void scrub_checksum(struct btrfs_work *work) | |||
| 382 | int ret; | 761 | int ret; |
| 383 | 762 | ||
| 384 | if (sbio->err) { | 763 | if (sbio->err) { |
| 764 | ret = 0; | ||
| 385 | for (i = 0; i < sbio->count; ++i) | 765 | for (i = 0; i < sbio->count; ++i) |
| 386 | scrub_recheck_error(sbio, i); | 766 | ret |= scrub_recheck_error(sbio, i); |
| 767 | if (!ret) { | ||
| 768 | spin_lock(&sdev->stat_lock); | ||
| 769 | ++sdev->stat.unverified_errors; | ||
| 770 | spin_unlock(&sdev->stat_lock); | ||
| 771 | } | ||
| 387 | 772 | ||
| 388 | sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); | 773 | sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); |
| 389 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | 774 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; |
| @@ -396,10 +781,6 @@ static void scrub_checksum(struct btrfs_work *work) | |||
| 396 | bi->bv_offset = 0; | 781 | bi->bv_offset = 0; |
| 397 | bi->bv_len = PAGE_SIZE; | 782 | bi->bv_len = PAGE_SIZE; |
| 398 | } | 783 | } |
| 399 | |||
| 400 | spin_lock(&sdev->stat_lock); | ||
| 401 | ++sdev->stat.read_errors; | ||
| 402 | spin_unlock(&sdev->stat_lock); | ||
| 403 | goto out; | 784 | goto out; |
| 404 | } | 785 | } |
| 405 | for (i = 0; i < sbio->count; ++i) { | 786 | for (i = 0; i < sbio->count; ++i) { |
| @@ -420,8 +801,14 @@ static void scrub_checksum(struct btrfs_work *work) | |||
| 420 | WARN_ON(1); | 801 | WARN_ON(1); |
| 421 | } | 802 | } |
| 422 | kunmap_atomic(buffer, KM_USER0); | 803 | kunmap_atomic(buffer, KM_USER0); |
| 423 | if (ret) | 804 | if (ret) { |
| 424 | scrub_recheck_error(sbio, i); | 805 | ret = scrub_recheck_error(sbio, i); |
| 806 | if (!ret) { | ||
| 807 | spin_lock(&sdev->stat_lock); | ||
| 808 | ++sdev->stat.unverified_errors; | ||
| 809 | spin_unlock(&sdev->stat_lock); | ||
| 810 | } | ||
| 811 | } | ||
| 425 | } | 812 | } |
| 426 | 813 | ||
| 427 | out: | 814 | out: |
| @@ -604,7 +991,7 @@ nomem: | |||
| 604 | } | 991 | } |
| 605 | 992 | ||
| 606 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, | 993 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, |
| 607 | u64 physical, u64 flags, u64 gen, u64 mirror_num, | 994 | u64 physical, u64 flags, u64 gen, int mirror_num, |
| 608 | u8 *csum, int force) | 995 | u8 *csum, int force) |
| 609 | { | 996 | { |
| 610 | struct scrub_bio *sbio; | 997 | struct scrub_bio *sbio; |
| @@ -701,7 +1088,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 701 | 1088 | ||
| 702 | /* scrub extent tries to collect up to 64 kB for each bio */ | 1089 | /* scrub extent tries to collect up to 64 kB for each bio */ |
| 703 | static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, | 1090 | static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, |
| 704 | u64 physical, u64 flags, u64 gen, u64 mirror_num) | 1091 | u64 physical, u64 flags, u64 gen, int mirror_num) |
| 705 | { | 1092 | { |
| 706 | int ret; | 1093 | int ret; |
| 707 | u8 csum[BTRFS_CSUM_SIZE]; | 1094 | u8 csum[BTRFS_CSUM_SIZE]; |
| @@ -741,13 +1128,16 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 741 | int slot; | 1128 | int slot; |
| 742 | int i; | 1129 | int i; |
| 743 | u64 nstripes; | 1130 | u64 nstripes; |
| 744 | int start_stripe; | ||
| 745 | struct extent_buffer *l; | 1131 | struct extent_buffer *l; |
| 746 | struct btrfs_key key; | 1132 | struct btrfs_key key; |
| 747 | u64 physical; | 1133 | u64 physical; |
| 748 | u64 logical; | 1134 | u64 logical; |
| 749 | u64 generation; | 1135 | u64 generation; |
| 750 | u64 mirror_num; | 1136 | int mirror_num; |
| 1137 | struct reada_control *reada1; | ||
| 1138 | struct reada_control *reada2; | ||
| 1139 | struct btrfs_key key_start; | ||
| 1140 | struct btrfs_key key_end; | ||
| 751 | 1141 | ||
| 752 | u64 increment = map->stripe_len; | 1142 | u64 increment = map->stripe_len; |
| 753 | u64 offset; | 1143 | u64 offset; |
| @@ -758,102 +1148,88 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 758 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 1148 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { |
| 759 | offset = map->stripe_len * num; | 1149 | offset = map->stripe_len * num; |
| 760 | increment = map->stripe_len * map->num_stripes; | 1150 | increment = map->stripe_len * map->num_stripes; |
| 761 | mirror_num = 0; | 1151 | mirror_num = 1; |
| 762 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | 1152 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { |
| 763 | int factor = map->num_stripes / map->sub_stripes; | 1153 | int factor = map->num_stripes / map->sub_stripes; |
| 764 | offset = map->stripe_len * (num / map->sub_stripes); | 1154 | offset = map->stripe_len * (num / map->sub_stripes); |
| 765 | increment = map->stripe_len * factor; | 1155 | increment = map->stripe_len * factor; |
| 766 | mirror_num = num % map->sub_stripes; | 1156 | mirror_num = num % map->sub_stripes + 1; |
| 767 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 1157 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
| 768 | increment = map->stripe_len; | 1158 | increment = map->stripe_len; |
| 769 | mirror_num = num % map->num_stripes; | 1159 | mirror_num = num % map->num_stripes + 1; |
| 770 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 1160 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
| 771 | increment = map->stripe_len; | 1161 | increment = map->stripe_len; |
| 772 | mirror_num = num % map->num_stripes; | 1162 | mirror_num = num % map->num_stripes + 1; |
| 773 | } else { | 1163 | } else { |
| 774 | increment = map->stripe_len; | 1164 | increment = map->stripe_len; |
| 775 | mirror_num = 0; | 1165 | mirror_num = 1; |
| 776 | } | 1166 | } |
| 777 | 1167 | ||
| 778 | path = btrfs_alloc_path(); | 1168 | path = btrfs_alloc_path(); |
| 779 | if (!path) | 1169 | if (!path) |
| 780 | return -ENOMEM; | 1170 | return -ENOMEM; |
| 781 | 1171 | ||
| 782 | path->reada = 2; | ||
| 783 | path->search_commit_root = 1; | 1172 | path->search_commit_root = 1; |
| 784 | path->skip_locking = 1; | 1173 | path->skip_locking = 1; |
| 785 | 1174 | ||
| 786 | /* | 1175 | /* |
| 787 | * find all extents for each stripe and just read them to get | 1176 | * trigger the readahead for extent tree csum tree and wait for |
| 788 | * them into the page cache | 1177 | * completion. During readahead, the scrub is officially paused |
| 789 | * FIXME: we can do better. build a more intelligent prefetching | 1178 | * to not hold off transaction commits |
| 790 | */ | 1179 | */ |
| 791 | logical = base + offset; | 1180 | logical = base + offset; |
| 792 | physical = map->stripes[num].physical; | ||
| 793 | ret = 0; | ||
| 794 | for (i = 0; i < nstripes; ++i) { | ||
| 795 | key.objectid = logical; | ||
| 796 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 797 | key.offset = (u64)0; | ||
| 798 | |||
| 799 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 800 | if (ret < 0) | ||
| 801 | goto out_noplug; | ||
| 802 | |||
| 803 | /* | ||
| 804 | * we might miss half an extent here, but that doesn't matter, | ||
| 805 | * as it's only the prefetch | ||
| 806 | */ | ||
| 807 | while (1) { | ||
| 808 | l = path->nodes[0]; | ||
| 809 | slot = path->slots[0]; | ||
| 810 | if (slot >= btrfs_header_nritems(l)) { | ||
| 811 | ret = btrfs_next_leaf(root, path); | ||
| 812 | if (ret == 0) | ||
| 813 | continue; | ||
| 814 | if (ret < 0) | ||
| 815 | goto out_noplug; | ||
| 816 | 1181 | ||
| 817 | break; | 1182 | wait_event(sdev->list_wait, |
| 818 | } | 1183 | atomic_read(&sdev->in_flight) == 0); |
| 819 | btrfs_item_key_to_cpu(l, &key, slot); | 1184 | atomic_inc(&fs_info->scrubs_paused); |
| 1185 | wake_up(&fs_info->scrub_pause_wait); | ||
| 820 | 1186 | ||
| 821 | if (key.objectid >= logical + map->stripe_len) | 1187 | /* FIXME it might be better to start readahead at commit root */ |
| 822 | break; | 1188 | key_start.objectid = logical; |
| 1189 | key_start.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 1190 | key_start.offset = (u64)0; | ||
| 1191 | key_end.objectid = base + offset + nstripes * increment; | ||
| 1192 | key_end.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 1193 | key_end.offset = (u64)0; | ||
| 1194 | reada1 = btrfs_reada_add(root, &key_start, &key_end); | ||
| 1195 | |||
| 1196 | key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | ||
| 1197 | key_start.type = BTRFS_EXTENT_CSUM_KEY; | ||
| 1198 | key_start.offset = logical; | ||
| 1199 | key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | ||
| 1200 | key_end.type = BTRFS_EXTENT_CSUM_KEY; | ||
| 1201 | key_end.offset = base + offset + nstripes * increment; | ||
| 1202 | reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); | ||
| 1203 | |||
| 1204 | if (!IS_ERR(reada1)) | ||
| 1205 | btrfs_reada_wait(reada1); | ||
| 1206 | if (!IS_ERR(reada2)) | ||
| 1207 | btrfs_reada_wait(reada2); | ||
| 823 | 1208 | ||
| 824 | path->slots[0]++; | 1209 | mutex_lock(&fs_info->scrub_lock); |
| 825 | } | 1210 | while (atomic_read(&fs_info->scrub_pause_req)) { |
| 826 | btrfs_release_path(path); | 1211 | mutex_unlock(&fs_info->scrub_lock); |
| 827 | logical += increment; | 1212 | wait_event(fs_info->scrub_pause_wait, |
| 828 | physical += map->stripe_len; | 1213 | atomic_read(&fs_info->scrub_pause_req) == 0); |
| 829 | cond_resched(); | 1214 | mutex_lock(&fs_info->scrub_lock); |
| 830 | } | 1215 | } |
| 1216 | atomic_dec(&fs_info->scrubs_paused); | ||
| 1217 | mutex_unlock(&fs_info->scrub_lock); | ||
| 1218 | wake_up(&fs_info->scrub_pause_wait); | ||
| 831 | 1219 | ||
| 832 | /* | 1220 | /* |
| 833 | * collect all data csums for the stripe to avoid seeking during | 1221 | * collect all data csums for the stripe to avoid seeking during |
| 834 | * the scrub. This might currently (crc32) end up to be about 1MB | 1222 | * the scrub. This might currently (crc32) end up to be about 1MB |
| 835 | */ | 1223 | */ |
| 836 | start_stripe = 0; | ||
| 837 | blk_start_plug(&plug); | 1224 | blk_start_plug(&plug); |
| 838 | again: | ||
| 839 | logical = base + offset + start_stripe * increment; | ||
| 840 | for (i = start_stripe; i < nstripes; ++i) { | ||
| 841 | ret = btrfs_lookup_csums_range(csum_root, logical, | ||
| 842 | logical + map->stripe_len - 1, | ||
| 843 | &sdev->csum_list, 1); | ||
| 844 | if (ret) | ||
| 845 | goto out; | ||
| 846 | 1225 | ||
| 847 | logical += increment; | ||
| 848 | cond_resched(); | ||
| 849 | } | ||
| 850 | /* | 1226 | /* |
| 851 | * now find all extents for each stripe and scrub them | 1227 | * now find all extents for each stripe and scrub them |
| 852 | */ | 1228 | */ |
| 853 | logical = base + offset + start_stripe * increment; | 1229 | logical = base + offset; |
| 854 | physical = map->stripes[num].physical + start_stripe * map->stripe_len; | 1230 | physical = map->stripes[num].physical; |
| 855 | ret = 0; | 1231 | ret = 0; |
| 856 | for (i = start_stripe; i < nstripes; ++i) { | 1232 | for (i = 0; i < nstripes; ++i) { |
| 857 | /* | 1233 | /* |
| 858 | * canceled? | 1234 | * canceled? |
| 859 | */ | 1235 | */ |
| @@ -882,11 +1258,14 @@ again: | |||
| 882 | atomic_dec(&fs_info->scrubs_paused); | 1258 | atomic_dec(&fs_info->scrubs_paused); |
| 883 | mutex_unlock(&fs_info->scrub_lock); | 1259 | mutex_unlock(&fs_info->scrub_lock); |
| 884 | wake_up(&fs_info->scrub_pause_wait); | 1260 | wake_up(&fs_info->scrub_pause_wait); |
| 885 | scrub_free_csums(sdev); | ||
| 886 | start_stripe = i; | ||
| 887 | goto again; | ||
| 888 | } | 1261 | } |
| 889 | 1262 | ||
| 1263 | ret = btrfs_lookup_csums_range(csum_root, logical, | ||
| 1264 | logical + map->stripe_len - 1, | ||
| 1265 | &sdev->csum_list, 1); | ||
| 1266 | if (ret) | ||
| 1267 | goto out; | ||
| 1268 | |||
| 890 | key.objectid = logical; | 1269 | key.objectid = logical; |
| 891 | key.type = BTRFS_EXTENT_ITEM_KEY; | 1270 | key.type = BTRFS_EXTENT_ITEM_KEY; |
| 892 | key.offset = (u64)0; | 1271 | key.offset = (u64)0; |
| @@ -982,7 +1361,6 @@ next: | |||
| 982 | 1361 | ||
| 983 | out: | 1362 | out: |
| 984 | blk_finish_plug(&plug); | 1363 | blk_finish_plug(&plug); |
| 985 | out_noplug: | ||
| 986 | btrfs_free_path(path); | 1364 | btrfs_free_path(path); |
| 987 | return ret < 0 ? ret : 0; | 1365 | return ret < 0 ? ret : 0; |
| 988 | } | 1366 | } |
| @@ -1253,10 +1631,11 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | |||
| 1253 | ret = scrub_enumerate_chunks(sdev, start, end); | 1631 | ret = scrub_enumerate_chunks(sdev, start, end); |
| 1254 | 1632 | ||
| 1255 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | 1633 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); |
| 1256 | |||
| 1257 | atomic_dec(&fs_info->scrubs_running); | 1634 | atomic_dec(&fs_info->scrubs_running); |
| 1258 | wake_up(&fs_info->scrub_pause_wait); | 1635 | wake_up(&fs_info->scrub_pause_wait); |
| 1259 | 1636 | ||
| 1637 | wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0); | ||
| 1638 | |||
| 1260 | if (progress) | 1639 | if (progress) |
| 1261 | memcpy(progress, &sdev->stat, sizeof(*progress)); | 1640 | memcpy(progress, &sdev->stat, sizeof(*progress)); |
| 1262 | 1641 | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 15634d4648d7..57080dffdfc6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/magic.h> | 40 | #include <linux/magic.h> |
| 41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
| 42 | #include <linux/cleancache.h> | 42 | #include <linux/cleancache.h> |
| 43 | #include <linux/mnt_namespace.h> | ||
| 43 | #include "compat.h" | 44 | #include "compat.h" |
| 44 | #include "delayed-inode.h" | 45 | #include "delayed-inode.h" |
| 45 | #include "ctree.h" | 46 | #include "ctree.h" |
| @@ -58,6 +59,7 @@ | |||
| 58 | #include <trace/events/btrfs.h> | 59 | #include <trace/events/btrfs.h> |
| 59 | 60 | ||
| 60 | static const struct super_operations btrfs_super_ops; | 61 | static const struct super_operations btrfs_super_ops; |
| 62 | static struct file_system_type btrfs_fs_type; | ||
| 61 | 63 | ||
| 62 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | 64 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, |
| 63 | char nbuf[16]) | 65 | char nbuf[16]) |
| @@ -162,7 +164,7 @@ enum { | |||
| 162 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, | 164 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
| 163 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | 165 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
| 164 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, | 166 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, |
| 165 | Opt_inode_cache, Opt_err, | 167 | Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err, |
| 166 | }; | 168 | }; |
| 167 | 169 | ||
| 168 | static match_table_t tokens = { | 170 | static match_table_t tokens = { |
| @@ -195,6 +197,8 @@ static match_table_t tokens = { | |||
| 195 | {Opt_subvolrootid, "subvolrootid=%d"}, | 197 | {Opt_subvolrootid, "subvolrootid=%d"}, |
| 196 | {Opt_defrag, "autodefrag"}, | 198 | {Opt_defrag, "autodefrag"}, |
| 197 | {Opt_inode_cache, "inode_cache"}, | 199 | {Opt_inode_cache, "inode_cache"}, |
| 200 | {Opt_no_space_cache, "no_space_cache"}, | ||
| 201 | {Opt_recovery, "recovery"}, | ||
| 198 | {Opt_err, NULL}, | 202 | {Opt_err, NULL}, |
| 199 | }; | 203 | }; |
| 200 | 204 | ||
| @@ -206,14 +210,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 206 | { | 210 | { |
| 207 | struct btrfs_fs_info *info = root->fs_info; | 211 | struct btrfs_fs_info *info = root->fs_info; |
| 208 | substring_t args[MAX_OPT_ARGS]; | 212 | substring_t args[MAX_OPT_ARGS]; |
| 209 | char *p, *num, *orig; | 213 | char *p, *num, *orig = NULL; |
| 214 | u64 cache_gen; | ||
| 210 | int intarg; | 215 | int intarg; |
| 211 | int ret = 0; | 216 | int ret = 0; |
| 212 | char *compress_type; | 217 | char *compress_type; |
| 213 | bool compress_force = false; | 218 | bool compress_force = false; |
| 214 | 219 | ||
| 220 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); | ||
| 221 | if (cache_gen) | ||
| 222 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | ||
| 223 | |||
| 215 | if (!options) | 224 | if (!options) |
| 216 | return 0; | 225 | goto out; |
| 217 | 226 | ||
| 218 | /* | 227 | /* |
| 219 | * strsep changes the string, duplicate it because parse_options | 228 | * strsep changes the string, duplicate it because parse_options |
| @@ -360,9 +369,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 360 | btrfs_set_opt(info->mount_opt, DISCARD); | 369 | btrfs_set_opt(info->mount_opt, DISCARD); |
| 361 | break; | 370 | break; |
| 362 | case Opt_space_cache: | 371 | case Opt_space_cache: |
| 363 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | ||
| 364 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | 372 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); |
| 365 | break; | 373 | break; |
| 374 | case Opt_no_space_cache: | ||
| 375 | printk(KERN_INFO "btrfs: disabling disk space caching\n"); | ||
| 376 | btrfs_clear_opt(info->mount_opt, SPACE_CACHE); | ||
| 377 | break; | ||
| 366 | case Opt_inode_cache: | 378 | case Opt_inode_cache: |
| 367 | printk(KERN_INFO "btrfs: enabling inode map caching\n"); | 379 | printk(KERN_INFO "btrfs: enabling inode map caching\n"); |
| 368 | btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); | 380 | btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); |
| @@ -381,6 +393,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 381 | printk(KERN_INFO "btrfs: enabling auto defrag"); | 393 | printk(KERN_INFO "btrfs: enabling auto defrag"); |
| 382 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); | 394 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); |
| 383 | break; | 395 | break; |
| 396 | case Opt_recovery: | ||
| 397 | printk(KERN_INFO "btrfs: enabling auto recovery"); | ||
| 398 | btrfs_set_opt(info->mount_opt, RECOVERY); | ||
| 399 | break; | ||
| 384 | case Opt_err: | 400 | case Opt_err: |
| 385 | printk(KERN_INFO "btrfs: unrecognized mount option " | 401 | printk(KERN_INFO "btrfs: unrecognized mount option " |
| 386 | "'%s'\n", p); | 402 | "'%s'\n", p); |
| @@ -391,6 +407,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 391 | } | 407 | } |
| 392 | } | 408 | } |
| 393 | out: | 409 | out: |
| 410 | if (!ret && btrfs_test_opt(root, SPACE_CACHE)) | ||
| 411 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
| 394 | kfree(orig); | 412 | kfree(orig); |
| 395 | return ret; | 413 | return ret; |
| 396 | } | 414 | } |
| @@ -406,12 +424,12 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
| 406 | u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) | 424 | u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) |
| 407 | { | 425 | { |
| 408 | substring_t args[MAX_OPT_ARGS]; | 426 | substring_t args[MAX_OPT_ARGS]; |
| 409 | char *opts, *orig, *p; | 427 | char *device_name, *opts, *orig, *p; |
| 410 | int error = 0; | 428 | int error = 0; |
| 411 | int intarg; | 429 | int intarg; |
| 412 | 430 | ||
| 413 | if (!options) | 431 | if (!options) |
| 414 | goto out; | 432 | return 0; |
| 415 | 433 | ||
| 416 | /* | 434 | /* |
| 417 | * strsep changes the string, duplicate it because parse_options | 435 | * strsep changes the string, duplicate it because parse_options |
| @@ -457,29 +475,24 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
| 457 | } | 475 | } |
| 458 | break; | 476 | break; |
| 459 | case Opt_device: | 477 | case Opt_device: |
| 460 | error = btrfs_scan_one_device(match_strdup(&args[0]), | 478 | device_name = match_strdup(&args[0]); |
| 479 | if (!device_name) { | ||
| 480 | error = -ENOMEM; | ||
| 481 | goto out; | ||
| 482 | } | ||
| 483 | error = btrfs_scan_one_device(device_name, | ||
| 461 | flags, holder, fs_devices); | 484 | flags, holder, fs_devices); |
| 485 | kfree(device_name); | ||
| 462 | if (error) | 486 | if (error) |
| 463 | goto out_free_opts; | 487 | goto out; |
| 464 | break; | 488 | break; |
| 465 | default: | 489 | default: |
| 466 | break; | 490 | break; |
| 467 | } | 491 | } |
| 468 | } | 492 | } |
| 469 | 493 | ||
| 470 | out_free_opts: | 494 | out: |
| 471 | kfree(orig); | 495 | kfree(orig); |
| 472 | out: | ||
| 473 | /* | ||
| 474 | * If no subvolume name is specified we use the default one. Allocate | ||
| 475 | * a copy of the string "." here so that code later in the | ||
| 476 | * mount path doesn't care if it's the default volume or another one. | ||
| 477 | */ | ||
| 478 | if (!*subvol_name) { | ||
| 479 | *subvol_name = kstrdup(".", GFP_KERNEL); | ||
| 480 | if (!*subvol_name) | ||
| 481 | return -ENOMEM; | ||
| 482 | } | ||
| 483 | return error; | 496 | return error; |
| 484 | } | 497 | } |
| 485 | 498 | ||
| @@ -492,7 +505,6 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
| 492 | struct btrfs_path *path; | 505 | struct btrfs_path *path; |
| 493 | struct btrfs_key location; | 506 | struct btrfs_key location; |
| 494 | struct inode *inode; | 507 | struct inode *inode; |
| 495 | struct dentry *dentry; | ||
| 496 | u64 dir_id; | 508 | u64 dir_id; |
| 497 | int new = 0; | 509 | int new = 0; |
| 498 | 510 | ||
| @@ -517,7 +529,7 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
| 517 | * will mount by default if we haven't been given a specific subvolume | 529 | * will mount by default if we haven't been given a specific subvolume |
| 518 | * to mount. | 530 | * to mount. |
| 519 | */ | 531 | */ |
| 520 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 532 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); |
| 521 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); | 533 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); |
| 522 | if (IS_ERR(di)) { | 534 | if (IS_ERR(di)) { |
| 523 | btrfs_free_path(path); | 535 | btrfs_free_path(path); |
| @@ -566,29 +578,7 @@ setup_root: | |||
| 566 | return dget(sb->s_root); | 578 | return dget(sb->s_root); |
| 567 | } | 579 | } |
| 568 | 580 | ||
| 569 | if (new) { | 581 | return d_obtain_alias(inode); |
| 570 | const struct qstr name = { .name = "/", .len = 1 }; | ||
| 571 | |||
| 572 | /* | ||
| 573 | * New inode, we need to make the dentry a sibling of s_root so | ||
| 574 | * everything gets cleaned up properly on unmount. | ||
| 575 | */ | ||
| 576 | dentry = d_alloc(sb->s_root, &name); | ||
| 577 | if (!dentry) { | ||
| 578 | iput(inode); | ||
| 579 | return ERR_PTR(-ENOMEM); | ||
| 580 | } | ||
| 581 | d_splice_alias(inode, dentry); | ||
| 582 | } else { | ||
| 583 | /* | ||
| 584 | * We found the inode in cache, just find a dentry for it and | ||
| 585 | * put the reference to the inode we just got. | ||
| 586 | */ | ||
| 587 | dentry = d_find_alias(inode); | ||
| 588 | iput(inode); | ||
| 589 | } | ||
| 590 | |||
| 591 | return dentry; | ||
| 592 | } | 582 | } |
| 593 | 583 | ||
| 594 | static int btrfs_fill_super(struct super_block *sb, | 584 | static int btrfs_fill_super(struct super_block *sb, |
| @@ -719,6 +709,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 719 | seq_puts(seq, ",noacl"); | 709 | seq_puts(seq, ",noacl"); |
| 720 | if (btrfs_test_opt(root, SPACE_CACHE)) | 710 | if (btrfs_test_opt(root, SPACE_CACHE)) |
| 721 | seq_puts(seq, ",space_cache"); | 711 | seq_puts(seq, ",space_cache"); |
| 712 | else | ||
| 713 | seq_puts(seq, ",no_space_cache"); | ||
| 722 | if (btrfs_test_opt(root, CLEAR_CACHE)) | 714 | if (btrfs_test_opt(root, CLEAR_CACHE)) |
| 723 | seq_puts(seq, ",clear_cache"); | 715 | seq_puts(seq, ",clear_cache"); |
| 724 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | 716 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) |
| @@ -753,6 +745,137 @@ static int btrfs_set_super(struct super_block *s, void *data) | |||
| 753 | return set_anon_super(s, data); | 745 | return set_anon_super(s, data); |
| 754 | } | 746 | } |
| 755 | 747 | ||
| 748 | /* | ||
| 749 | * subvolumes are identified by ino 256 | ||
| 750 | */ | ||
| 751 | static inline int is_subvolume_inode(struct inode *inode) | ||
| 752 | { | ||
| 753 | if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
| 754 | return 1; | ||
| 755 | return 0; | ||
| 756 | } | ||
| 757 | |||
| 758 | /* | ||
| 759 | * This will strip out the subvol=%s argument for an argument string and add | ||
| 760 | * subvolid=0 to make sure we get the actual tree root for path walking to the | ||
| 761 | * subvol we want. | ||
| 762 | */ | ||
| 763 | static char *setup_root_args(char *args) | ||
| 764 | { | ||
| 765 | unsigned copied = 0; | ||
| 766 | unsigned len = strlen(args) + 2; | ||
| 767 | char *pos; | ||
| 768 | char *ret; | ||
| 769 | |||
| 770 | /* | ||
| 771 | * We need the same args as before, but minus | ||
| 772 | * | ||
| 773 | * subvol=a | ||
| 774 | * | ||
| 775 | * and add | ||
| 776 | * | ||
| 777 | * subvolid=0 | ||
| 778 | * | ||
| 779 | * which is a difference of 2 characters, so we allocate strlen(args) + | ||
| 780 | * 2 characters. | ||
| 781 | */ | ||
| 782 | ret = kzalloc(len * sizeof(char), GFP_NOFS); | ||
| 783 | if (!ret) | ||
| 784 | return NULL; | ||
| 785 | pos = strstr(args, "subvol="); | ||
| 786 | |||
| 787 | /* This shouldn't happen, but just in case.. */ | ||
| 788 | if (!pos) { | ||
| 789 | kfree(ret); | ||
| 790 | return NULL; | ||
| 791 | } | ||
| 792 | |||
| 793 | /* | ||
| 794 | * The subvol=<> arg is not at the front of the string, copy everybody | ||
| 795 | * up to that into ret. | ||
| 796 | */ | ||
| 797 | if (pos != args) { | ||
| 798 | *pos = '\0'; | ||
| 799 | strcpy(ret, args); | ||
| 800 | copied += strlen(args); | ||
| 801 | pos++; | ||
| 802 | } | ||
| 803 | |||
| 804 | strncpy(ret + copied, "subvolid=0", len - copied); | ||
| 805 | |||
| 806 | /* Length of subvolid=0 */ | ||
| 807 | copied += 10; | ||
| 808 | |||
| 809 | /* | ||
| 810 | * If there is no , after the subvol= option then we know there's no | ||
| 811 | * other options and we can just return. | ||
| 812 | */ | ||
| 813 | pos = strchr(pos, ','); | ||
| 814 | if (!pos) | ||
| 815 | return ret; | ||
| 816 | |||
| 817 | /* Copy the rest of the arguments into our buffer */ | ||
| 818 | strncpy(ret + copied, pos, len - copied); | ||
| 819 | copied += strlen(pos); | ||
| 820 | |||
| 821 | return ret; | ||
| 822 | } | ||
| 823 | |||
| 824 | static struct dentry *mount_subvol(const char *subvol_name, int flags, | ||
| 825 | const char *device_name, char *data) | ||
| 826 | { | ||
| 827 | struct super_block *s; | ||
| 828 | struct dentry *root; | ||
| 829 | struct vfsmount *mnt; | ||
| 830 | struct mnt_namespace *ns_private; | ||
| 831 | char *newargs; | ||
| 832 | struct path path; | ||
| 833 | int error; | ||
| 834 | |||
| 835 | newargs = setup_root_args(data); | ||
| 836 | if (!newargs) | ||
| 837 | return ERR_PTR(-ENOMEM); | ||
| 838 | mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, | ||
| 839 | newargs); | ||
| 840 | kfree(newargs); | ||
| 841 | if (IS_ERR(mnt)) | ||
| 842 | return ERR_CAST(mnt); | ||
| 843 | |||
| 844 | ns_private = create_mnt_ns(mnt); | ||
| 845 | if (IS_ERR(ns_private)) { | ||
| 846 | mntput(mnt); | ||
| 847 | return ERR_CAST(ns_private); | ||
| 848 | } | ||
| 849 | |||
| 850 | /* | ||
| 851 | * This will trigger the automount of the subvol so we can just | ||
| 852 | * drop the mnt we have here and return the dentry that we | ||
| 853 | * found. | ||
| 854 | */ | ||
| 855 | error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name, | ||
| 856 | LOOKUP_FOLLOW, &path); | ||
| 857 | put_mnt_ns(ns_private); | ||
| 858 | if (error) | ||
| 859 | return ERR_PTR(error); | ||
| 860 | |||
| 861 | if (!is_subvolume_inode(path.dentry->d_inode)) { | ||
| 862 | path_put(&path); | ||
| 863 | mntput(mnt); | ||
| 864 | error = -EINVAL; | ||
| 865 | printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", | ||
| 866 | subvol_name); | ||
| 867 | return ERR_PTR(-EINVAL); | ||
| 868 | } | ||
| 869 | |||
| 870 | /* Get a ref to the sb and the dentry we found and return it */ | ||
| 871 | s = path.mnt->mnt_sb; | ||
| 872 | atomic_inc(&s->s_active); | ||
| 873 | root = dget(path.dentry); | ||
| 874 | path_put(&path); | ||
| 875 | down_write(&s->s_umount); | ||
| 876 | |||
| 877 | return root; | ||
| 878 | } | ||
| 756 | 879 | ||
| 757 | /* | 880 | /* |
| 758 | * Find a superblock for the given device / mount point. | 881 | * Find a superblock for the given device / mount point. |
| @@ -784,13 +907,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
| 784 | if (error) | 907 | if (error) |
| 785 | return ERR_PTR(error); | 908 | return ERR_PTR(error); |
| 786 | 909 | ||
| 910 | if (subvol_name) { | ||
| 911 | root = mount_subvol(subvol_name, flags, device_name, data); | ||
| 912 | kfree(subvol_name); | ||
| 913 | return root; | ||
| 914 | } | ||
| 915 | |||
| 787 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); | 916 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); |
| 788 | if (error) | 917 | if (error) |
| 789 | goto error_free_subvol_name; | 918 | return ERR_PTR(error); |
| 790 | 919 | ||
| 791 | error = btrfs_open_devices(fs_devices, mode, fs_type); | 920 | error = btrfs_open_devices(fs_devices, mode, fs_type); |
| 792 | if (error) | 921 | if (error) |
| 793 | goto error_free_subvol_name; | 922 | return ERR_PTR(error); |
| 794 | 923 | ||
| 795 | if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { | 924 | if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { |
| 796 | error = -EACCES; | 925 | error = -EACCES; |
| @@ -813,88 +942,57 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
| 813 | fs_info->fs_devices = fs_devices; | 942 | fs_info->fs_devices = fs_devices; |
| 814 | tree_root->fs_info = fs_info; | 943 | tree_root->fs_info = fs_info; |
| 815 | 944 | ||
| 945 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | ||
| 946 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | ||
| 947 | if (!fs_info->super_copy || !fs_info->super_for_commit) { | ||
| 948 | error = -ENOMEM; | ||
| 949 | goto error_close_devices; | ||
| 950 | } | ||
| 951 | |||
| 816 | bdev = fs_devices->latest_bdev; | 952 | bdev = fs_devices->latest_bdev; |
| 817 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); | 953 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); |
| 818 | if (IS_ERR(s)) | 954 | if (IS_ERR(s)) { |
| 819 | goto error_s; | 955 | error = PTR_ERR(s); |
| 956 | goto error_close_devices; | ||
| 957 | } | ||
| 820 | 958 | ||
| 821 | if (s->s_root) { | 959 | if (s->s_root) { |
| 822 | if ((flags ^ s->s_flags) & MS_RDONLY) { | 960 | if ((flags ^ s->s_flags) & MS_RDONLY) { |
| 823 | deactivate_locked_super(s); | 961 | deactivate_locked_super(s); |
| 824 | error = -EBUSY; | 962 | return ERR_PTR(-EBUSY); |
| 825 | goto error_close_devices; | ||
| 826 | } | 963 | } |
| 827 | 964 | ||
| 828 | btrfs_close_devices(fs_devices); | 965 | btrfs_close_devices(fs_devices); |
| 829 | kfree(fs_info); | 966 | free_fs_info(fs_info); |
| 830 | kfree(tree_root); | 967 | kfree(tree_root); |
| 831 | } else { | 968 | } else { |
| 832 | char b[BDEVNAME_SIZE]; | 969 | char b[BDEVNAME_SIZE]; |
| 833 | 970 | ||
| 834 | s->s_flags = flags | MS_NOSEC; | 971 | s->s_flags = flags | MS_NOSEC; |
| 835 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 972 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
| 973 | btrfs_sb(s)->fs_info->bdev_holder = fs_type; | ||
| 836 | error = btrfs_fill_super(s, fs_devices, data, | 974 | error = btrfs_fill_super(s, fs_devices, data, |
| 837 | flags & MS_SILENT ? 1 : 0); | 975 | flags & MS_SILENT ? 1 : 0); |
| 838 | if (error) { | 976 | if (error) { |
| 839 | deactivate_locked_super(s); | 977 | deactivate_locked_super(s); |
| 840 | goto error_free_subvol_name; | 978 | return ERR_PTR(error); |
| 841 | } | 979 | } |
| 842 | 980 | ||
| 843 | btrfs_sb(s)->fs_info->bdev_holder = fs_type; | ||
| 844 | s->s_flags |= MS_ACTIVE; | 981 | s->s_flags |= MS_ACTIVE; |
| 845 | } | 982 | } |
| 846 | 983 | ||
| 847 | /* if they gave us a subvolume name bind mount into that */ | 984 | root = get_default_root(s, subvol_objectid); |
| 848 | if (strcmp(subvol_name, ".")) { | 985 | if (IS_ERR(root)) { |
| 849 | struct dentry *new_root; | 986 | deactivate_locked_super(s); |
| 850 | 987 | return root; | |
| 851 | root = get_default_root(s, subvol_rootid); | ||
| 852 | if (IS_ERR(root)) { | ||
| 853 | error = PTR_ERR(root); | ||
| 854 | deactivate_locked_super(s); | ||
| 855 | goto error_free_subvol_name; | ||
| 856 | } | ||
| 857 | |||
| 858 | mutex_lock(&root->d_inode->i_mutex); | ||
| 859 | new_root = lookup_one_len(subvol_name, root, | ||
| 860 | strlen(subvol_name)); | ||
| 861 | mutex_unlock(&root->d_inode->i_mutex); | ||
| 862 | |||
| 863 | if (IS_ERR(new_root)) { | ||
| 864 | dput(root); | ||
| 865 | deactivate_locked_super(s); | ||
| 866 | error = PTR_ERR(new_root); | ||
| 867 | goto error_free_subvol_name; | ||
| 868 | } | ||
| 869 | if (!new_root->d_inode) { | ||
| 870 | dput(root); | ||
| 871 | dput(new_root); | ||
| 872 | deactivate_locked_super(s); | ||
| 873 | error = -ENXIO; | ||
| 874 | goto error_free_subvol_name; | ||
| 875 | } | ||
| 876 | dput(root); | ||
| 877 | root = new_root; | ||
| 878 | } else { | ||
| 879 | root = get_default_root(s, subvol_objectid); | ||
| 880 | if (IS_ERR(root)) { | ||
| 881 | error = PTR_ERR(root); | ||
| 882 | deactivate_locked_super(s); | ||
| 883 | goto error_free_subvol_name; | ||
| 884 | } | ||
| 885 | } | 988 | } |
| 886 | 989 | ||
| 887 | kfree(subvol_name); | ||
| 888 | return root; | 990 | return root; |
| 889 | 991 | ||
| 890 | error_s: | ||
| 891 | error = PTR_ERR(s); | ||
| 892 | error_close_devices: | 992 | error_close_devices: |
| 893 | btrfs_close_devices(fs_devices); | 993 | btrfs_close_devices(fs_devices); |
| 894 | kfree(fs_info); | 994 | free_fs_info(fs_info); |
| 895 | kfree(tree_root); | 995 | kfree(tree_root); |
| 896 | error_free_subvol_name: | ||
| 897 | kfree(subvol_name); | ||
| 898 | return ERR_PTR(error); | 996 | return ERR_PTR(error); |
| 899 | } | 997 | } |
| 900 | 998 | ||
| @@ -919,7 +1017,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 919 | if (root->fs_info->fs_devices->rw_devices == 0) | 1017 | if (root->fs_info->fs_devices->rw_devices == 0) |
| 920 | return -EACCES; | 1018 | return -EACCES; |
| 921 | 1019 | ||
| 922 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 1020 | if (btrfs_super_log_root(root->fs_info->super_copy) != 0) |
| 923 | return -EINVAL; | 1021 | return -EINVAL; |
| 924 | 1022 | ||
| 925 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 1023 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
| @@ -1085,7 +1183,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
| 1085 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 1183 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 1086 | { | 1184 | { |
| 1087 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 1185 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
| 1088 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | 1186 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; |
| 1089 | struct list_head *head = &root->fs_info->space_info; | 1187 | struct list_head *head = &root->fs_info->space_info; |
| 1090 | struct btrfs_space_info *found; | 1188 | struct btrfs_space_info *found; |
| 1091 | u64 total_used = 0; | 1189 | u64 total_used = 0; |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e24b7964a155..960835eaf4da 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -55,6 +55,7 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) | |||
| 55 | struct btrfs_transaction *cur_trans; | 55 | struct btrfs_transaction *cur_trans; |
| 56 | 56 | ||
| 57 | spin_lock(&root->fs_info->trans_lock); | 57 | spin_lock(&root->fs_info->trans_lock); |
| 58 | loop: | ||
| 58 | if (root->fs_info->trans_no_join) { | 59 | if (root->fs_info->trans_no_join) { |
| 59 | if (!nofail) { | 60 | if (!nofail) { |
| 60 | spin_unlock(&root->fs_info->trans_lock); | 61 | spin_unlock(&root->fs_info->trans_lock); |
| @@ -75,16 +76,18 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) | |||
| 75 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 76 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
| 76 | if (!cur_trans) | 77 | if (!cur_trans) |
| 77 | return -ENOMEM; | 78 | return -ENOMEM; |
| 79 | |||
| 78 | spin_lock(&root->fs_info->trans_lock); | 80 | spin_lock(&root->fs_info->trans_lock); |
| 79 | if (root->fs_info->running_transaction) { | 81 | if (root->fs_info->running_transaction) { |
| 82 | /* | ||
| 83 | * someone started a transaction after we unlocked. Make sure | ||
| 84 | * to redo the trans_no_join checks above | ||
| 85 | */ | ||
| 80 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 86 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
| 81 | cur_trans = root->fs_info->running_transaction; | 87 | cur_trans = root->fs_info->running_transaction; |
| 82 | atomic_inc(&cur_trans->use_count); | 88 | goto loop; |
| 83 | atomic_inc(&cur_trans->num_writers); | ||
| 84 | cur_trans->num_joined++; | ||
| 85 | spin_unlock(&root->fs_info->trans_lock); | ||
| 86 | return 0; | ||
| 87 | } | 89 | } |
| 90 | |||
| 88 | atomic_set(&cur_trans->num_writers, 1); | 91 | atomic_set(&cur_trans->num_writers, 1); |
| 89 | cur_trans->num_joined = 0; | 92 | cur_trans->num_joined = 0; |
| 90 | init_waitqueue_head(&cur_trans->writer_wait); | 93 | init_waitqueue_head(&cur_trans->writer_wait); |
| @@ -275,7 +278,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 275 | */ | 278 | */ |
| 276 | if (num_items > 0 && root != root->fs_info->chunk_root) { | 279 | if (num_items > 0 && root != root->fs_info->chunk_root) { |
| 277 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 280 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
| 278 | ret = btrfs_block_rsv_add(NULL, root, | 281 | ret = btrfs_block_rsv_add(root, |
| 279 | &root->fs_info->trans_block_rsv, | 282 | &root->fs_info->trans_block_rsv, |
| 280 | num_bytes); | 283 | num_bytes); |
| 281 | if (ret) | 284 | if (ret) |
| @@ -418,8 +421,8 @@ static int should_end_transaction(struct btrfs_trans_handle *trans, | |||
| 418 | struct btrfs_root *root) | 421 | struct btrfs_root *root) |
| 419 | { | 422 | { |
| 420 | int ret; | 423 | int ret; |
| 421 | ret = btrfs_block_rsv_check(trans, root, | 424 | |
| 422 | &root->fs_info->global_block_rsv, 0, 5); | 425 | ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); |
| 423 | return ret ? 1 : 0; | 426 | return ret ? 1 : 0; |
| 424 | } | 427 | } |
| 425 | 428 | ||
| @@ -427,17 +430,26 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
| 427 | struct btrfs_root *root) | 430 | struct btrfs_root *root) |
| 428 | { | 431 | { |
| 429 | struct btrfs_transaction *cur_trans = trans->transaction; | 432 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 433 | struct btrfs_block_rsv *rsv = trans->block_rsv; | ||
| 430 | int updates; | 434 | int updates; |
| 431 | 435 | ||
| 432 | smp_mb(); | 436 | smp_mb(); |
| 433 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 437 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) |
| 434 | return 1; | 438 | return 1; |
| 435 | 439 | ||
| 440 | /* | ||
| 441 | * We need to do this in case we're deleting csums so the global block | ||
| 442 | * rsv get's used instead of the csum block rsv. | ||
| 443 | */ | ||
| 444 | trans->block_rsv = NULL; | ||
| 445 | |||
| 436 | updates = trans->delayed_ref_updates; | 446 | updates = trans->delayed_ref_updates; |
| 437 | trans->delayed_ref_updates = 0; | 447 | trans->delayed_ref_updates = 0; |
| 438 | if (updates) | 448 | if (updates) |
| 439 | btrfs_run_delayed_refs(trans, root, updates); | 449 | btrfs_run_delayed_refs(trans, root, updates); |
| 440 | 450 | ||
| 451 | trans->block_rsv = rsv; | ||
| 452 | |||
| 441 | return should_end_transaction(trans, root); | 453 | return should_end_transaction(trans, root); |
| 442 | } | 454 | } |
| 443 | 455 | ||
| @@ -453,6 +465,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 453 | return 0; | 465 | return 0; |
| 454 | } | 466 | } |
| 455 | 467 | ||
| 468 | btrfs_trans_release_metadata(trans, root); | ||
| 469 | trans->block_rsv = NULL; | ||
| 456 | while (count < 4) { | 470 | while (count < 4) { |
| 457 | unsigned long cur = trans->delayed_ref_updates; | 471 | unsigned long cur = trans->delayed_ref_updates; |
| 458 | trans->delayed_ref_updates = 0; | 472 | trans->delayed_ref_updates = 0; |
| @@ -473,8 +487,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 473 | count++; | 487 | count++; |
| 474 | } | 488 | } |
| 475 | 489 | ||
| 476 | btrfs_trans_release_metadata(trans, root); | ||
| 477 | |||
| 478 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 490 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
| 479 | should_end_transaction(trans, root)) { | 491 | should_end_transaction(trans, root)) { |
| 480 | trans->transaction->blocked = 1; | 492 | trans->transaction->blocked = 1; |
| @@ -562,50 +574,21 @@ int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, | |||
| 562 | int btrfs_write_marked_extents(struct btrfs_root *root, | 574 | int btrfs_write_marked_extents(struct btrfs_root *root, |
| 563 | struct extent_io_tree *dirty_pages, int mark) | 575 | struct extent_io_tree *dirty_pages, int mark) |
| 564 | { | 576 | { |
| 565 | int ret; | ||
| 566 | int err = 0; | 577 | int err = 0; |
| 567 | int werr = 0; | 578 | int werr = 0; |
| 568 | struct page *page; | 579 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
| 569 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 570 | u64 start = 0; | 580 | u64 start = 0; |
| 571 | u64 end; | 581 | u64 end; |
| 572 | unsigned long index; | ||
| 573 | |||
| 574 | while (1) { | ||
| 575 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
| 576 | mark); | ||
| 577 | if (ret) | ||
| 578 | break; | ||
| 579 | while (start <= end) { | ||
| 580 | cond_resched(); | ||
| 581 | |||
| 582 | index = start >> PAGE_CACHE_SHIFT; | ||
| 583 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
| 584 | page = find_get_page(btree_inode->i_mapping, index); | ||
| 585 | if (!page) | ||
| 586 | continue; | ||
| 587 | |||
| 588 | btree_lock_page_hook(page); | ||
| 589 | if (!page->mapping) { | ||
| 590 | unlock_page(page); | ||
| 591 | page_cache_release(page); | ||
| 592 | continue; | ||
| 593 | } | ||
| 594 | 582 | ||
| 595 | if (PageWriteback(page)) { | 583 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
| 596 | if (PageDirty(page)) | 584 | mark)) { |
| 597 | wait_on_page_writeback(page); | 585 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, |
| 598 | else { | 586 | GFP_NOFS); |
| 599 | unlock_page(page); | 587 | err = filemap_fdatawrite_range(mapping, start, end); |
| 600 | page_cache_release(page); | 588 | if (err) |
| 601 | continue; | 589 | werr = err; |
| 602 | } | 590 | cond_resched(); |
| 603 | } | 591 | start = end + 1; |
| 604 | err = write_one_page(page, 0); | ||
| 605 | if (err) | ||
| 606 | werr = err; | ||
| 607 | page_cache_release(page); | ||
| 608 | } | ||
| 609 | } | 592 | } |
| 610 | if (err) | 593 | if (err) |
| 611 | werr = err; | 594 | werr = err; |
| @@ -621,39 +604,20 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
| 621 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 604 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
| 622 | struct extent_io_tree *dirty_pages, int mark) | 605 | struct extent_io_tree *dirty_pages, int mark) |
| 623 | { | 606 | { |
| 624 | int ret; | ||
| 625 | int err = 0; | 607 | int err = 0; |
| 626 | int werr = 0; | 608 | int werr = 0; |
| 627 | struct page *page; | 609 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
| 628 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 629 | u64 start = 0; | 610 | u64 start = 0; |
| 630 | u64 end; | 611 | u64 end; |
| 631 | unsigned long index; | ||
| 632 | |||
| 633 | while (1) { | ||
| 634 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
| 635 | mark); | ||
| 636 | if (ret) | ||
| 637 | break; | ||
| 638 | 612 | ||
| 639 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | 613 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
| 640 | while (start <= end) { | 614 | EXTENT_NEED_WAIT)) { |
| 641 | index = start >> PAGE_CACHE_SHIFT; | 615 | clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); |
| 642 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | 616 | err = filemap_fdatawait_range(mapping, start, end); |
| 643 | page = find_get_page(btree_inode->i_mapping, index); | 617 | if (err) |
| 644 | if (!page) | 618 | werr = err; |
| 645 | continue; | 619 | cond_resched(); |
| 646 | if (PageDirty(page)) { | 620 | start = end + 1; |
| 647 | btree_lock_page_hook(page); | ||
| 648 | wait_on_page_writeback(page); | ||
| 649 | err = write_one_page(page, 0); | ||
| 650 | if (err) | ||
| 651 | werr = err; | ||
| 652 | } | ||
| 653 | wait_on_page_writeback(page); | ||
| 654 | page_cache_release(page); | ||
| 655 | cond_resched(); | ||
| 656 | } | ||
| 657 | } | 621 | } |
| 658 | if (err) | 622 | if (err) |
| 659 | werr = err; | 623 | werr = err; |
| @@ -673,7 +637,12 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
| 673 | 637 | ||
| 674 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); | 638 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); |
| 675 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); | 639 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); |
| 676 | return ret || ret2; | 640 | |
| 641 | if (ret) | ||
| 642 | return ret; | ||
| 643 | if (ret2) | ||
| 644 | return ret2; | ||
| 645 | return 0; | ||
| 677 | } | 646 | } |
| 678 | 647 | ||
| 679 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 648 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
| @@ -911,10 +880,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 911 | } | 880 | } |
| 912 | 881 | ||
| 913 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | 882 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); |
| 914 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
| 915 | 883 | ||
| 916 | if (to_reserve > 0) { | 884 | if (to_reserve > 0) { |
| 917 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | 885 | ret = btrfs_block_rsv_add(root, &pending->block_rsv, |
| 918 | to_reserve); | 886 | to_reserve); |
| 919 | if (ret) { | 887 | if (ret) { |
| 920 | pending->error = ret; | 888 | pending->error = ret; |
| @@ -1002,7 +970,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1002 | BUG_ON(IS_ERR(pending->snap)); | 970 | BUG_ON(IS_ERR(pending->snap)); |
| 1003 | 971 | ||
| 1004 | btrfs_reloc_post_snapshot(trans, pending); | 972 | btrfs_reloc_post_snapshot(trans, pending); |
| 1005 | btrfs_orphan_post_snapshot(trans, pending); | ||
| 1006 | fail: | 973 | fail: |
| 1007 | kfree(new_root_item); | 974 | kfree(new_root_item); |
| 1008 | trans->block_rsv = rsv; | 975 | trans->block_rsv = rsv; |
| @@ -1032,7 +999,7 @@ static void update_super_roots(struct btrfs_root *root) | |||
| 1032 | struct btrfs_root_item *root_item; | 999 | struct btrfs_root_item *root_item; |
| 1033 | struct btrfs_super_block *super; | 1000 | struct btrfs_super_block *super; |
| 1034 | 1001 | ||
| 1035 | super = &root->fs_info->super_copy; | 1002 | super = root->fs_info->super_copy; |
| 1036 | 1003 | ||
| 1037 | root_item = &root->fs_info->chunk_root->root_item; | 1004 | root_item = &root->fs_info->chunk_root->root_item; |
| 1038 | super->chunk_root = root_item->bytenr; | 1005 | super->chunk_root = root_item->bytenr; |
| @@ -1043,7 +1010,7 @@ static void update_super_roots(struct btrfs_root *root) | |||
| 1043 | super->root = root_item->bytenr; | 1010 | super->root = root_item->bytenr; |
| 1044 | super->generation = root_item->generation; | 1011 | super->generation = root_item->generation; |
| 1045 | super->root_level = root_item->level; | 1012 | super->root_level = root_item->level; |
| 1046 | if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) | 1013 | if (btrfs_test_opt(root, SPACE_CACHE)) |
| 1047 | super->cache_generation = root_item->generation; | 1014 | super->cache_generation = root_item->generation; |
| 1048 | } | 1015 | } |
| 1049 | 1016 | ||
| @@ -1168,14 +1135,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1168 | 1135 | ||
| 1169 | btrfs_run_ordered_operations(root, 0); | 1136 | btrfs_run_ordered_operations(root, 0); |
| 1170 | 1137 | ||
| 1138 | btrfs_trans_release_metadata(trans, root); | ||
| 1139 | trans->block_rsv = NULL; | ||
| 1140 | |||
| 1171 | /* make a pass through all the delayed refs we have so far | 1141 | /* make a pass through all the delayed refs we have so far |
| 1172 | * any runnings procs may add more while we are here | 1142 | * any runnings procs may add more while we are here |
| 1173 | */ | 1143 | */ |
| 1174 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1144 | ret = btrfs_run_delayed_refs(trans, root, 0); |
| 1175 | BUG_ON(ret); | 1145 | BUG_ON(ret); |
| 1176 | 1146 | ||
| 1177 | btrfs_trans_release_metadata(trans, root); | ||
| 1178 | |||
| 1179 | cur_trans = trans->transaction; | 1147 | cur_trans = trans->transaction; |
| 1180 | /* | 1148 | /* |
| 1181 | * set the flushing flag so procs in this transaction have to | 1149 | * set the flushing flag so procs in this transaction have to |
| @@ -1341,12 +1309,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1341 | update_super_roots(root); | 1309 | update_super_roots(root); |
| 1342 | 1310 | ||
| 1343 | if (!root->fs_info->log_root_recovering) { | 1311 | if (!root->fs_info->log_root_recovering) { |
| 1344 | btrfs_set_super_log_root(&root->fs_info->super_copy, 0); | 1312 | btrfs_set_super_log_root(root->fs_info->super_copy, 0); |
| 1345 | btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); | 1313 | btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); |
| 1346 | } | 1314 | } |
| 1347 | 1315 | ||
| 1348 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | 1316 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, |
| 1349 | sizeof(root->fs_info->super_copy)); | 1317 | sizeof(*root->fs_info->super_copy)); |
| 1350 | 1318 | ||
| 1351 | trans->transaction->blocked = 0; | 1319 | trans->transaction->blocked = 0; |
| 1352 | spin_lock(&root->fs_info->trans_lock); | 1320 | spin_lock(&root->fs_info->trans_lock); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0618aa39740b..3568374d419d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log, | |||
| 276 | struct walk_control *wc, u64 gen) | 276 | struct walk_control *wc, u64 gen) |
| 277 | { | 277 | { |
| 278 | if (wc->pin) | 278 | if (wc->pin) |
| 279 | btrfs_pin_extent(log->fs_info->extent_root, | 279 | btrfs_pin_extent_for_log_replay(wc->trans, |
| 280 | eb->start, eb->len, 0); | 280 | log->fs_info->extent_root, |
| 281 | eb->start, eb->len); | ||
| 281 | 282 | ||
| 282 | if (btrfs_buffer_uptodate(eb, gen)) { | 283 | if (btrfs_buffer_uptodate(eb, gen)) { |
| 283 | if (wc->write) | 284 | if (wc->write) |
| @@ -1760,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
| 1760 | 1761 | ||
| 1761 | WARN_ON(root_owner != | 1762 | WARN_ON(root_owner != |
| 1762 | BTRFS_TREE_LOG_OBJECTID); | 1763 | BTRFS_TREE_LOG_OBJECTID); |
| 1763 | ret = btrfs_free_reserved_extent(root, | 1764 | ret = btrfs_free_and_pin_reserved_extent(root, |
| 1764 | bytenr, blocksize); | 1765 | bytenr, blocksize); |
| 1765 | BUG_ON(ret); | 1766 | BUG_ON(ret); |
| 1766 | } | 1767 | } |
| @@ -1828,7 +1829,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
| 1828 | btrfs_tree_unlock(next); | 1829 | btrfs_tree_unlock(next); |
| 1829 | 1830 | ||
| 1830 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | 1831 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); |
| 1831 | ret = btrfs_free_reserved_extent(root, | 1832 | ret = btrfs_free_and_pin_reserved_extent(root, |
| 1832 | path->nodes[*level]->start, | 1833 | path->nodes[*level]->start, |
| 1833 | path->nodes[*level]->len); | 1834 | path->nodes[*level]->len); |
| 1834 | BUG_ON(ret); | 1835 | BUG_ON(ret); |
| @@ -1897,7 +1898,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
| 1897 | 1898 | ||
| 1898 | WARN_ON(log->root_key.objectid != | 1899 | WARN_ON(log->root_key.objectid != |
| 1899 | BTRFS_TREE_LOG_OBJECTID); | 1900 | BTRFS_TREE_LOG_OBJECTID); |
| 1900 | ret = btrfs_free_reserved_extent(log, next->start, | 1901 | ret = btrfs_free_and_pin_reserved_extent(log, next->start, |
| 1901 | next->len); | 1902 | next->len); |
| 1902 | BUG_ON(ret); | 1903 | BUG_ON(ret); |
| 1903 | } | 1904 | } |
| @@ -2013,10 +2014,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2013 | /* wait for previous tree log sync to complete */ | 2014 | /* wait for previous tree log sync to complete */ |
| 2014 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2015 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 2015 | wait_log_commit(trans, root, root->log_transid - 1); | 2016 | wait_log_commit(trans, root, root->log_transid - 1); |
| 2016 | |||
| 2017 | while (1) { | 2017 | while (1) { |
| 2018 | unsigned long batch = root->log_batch; | 2018 | unsigned long batch = root->log_batch; |
| 2019 | if (root->log_multiple_pids) { | 2019 | /* when we're on an ssd, just kick the log commit out */ |
| 2020 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | ||
| 2020 | mutex_unlock(&root->log_mutex); | 2021 | mutex_unlock(&root->log_mutex); |
| 2021 | schedule_timeout_uninterruptible(1); | 2022 | schedule_timeout_uninterruptible(1); |
| 2022 | mutex_lock(&root->log_mutex); | 2023 | mutex_lock(&root->log_mutex); |
| @@ -2117,9 +2118,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2117 | BUG_ON(ret); | 2118 | BUG_ON(ret); |
| 2118 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2119 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| 2119 | 2120 | ||
| 2120 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, | 2121 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
| 2121 | log_root_tree->node->start); | 2122 | log_root_tree->node->start); |
| 2122 | btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, | 2123 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, |
| 2123 | btrfs_header_level(log_root_tree->node)); | 2124 | btrfs_header_level(log_root_tree->node)); |
| 2124 | 2125 | ||
| 2125 | log_root_tree->log_batch = 0; | 2126 | log_root_tree->log_batch = 0; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f2a4cc79da61..f8e2943101a1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -366,6 +366,14 @@ static noinline int device_list_add(const char *path, | |||
| 366 | } | 366 | } |
| 367 | INIT_LIST_HEAD(&device->dev_alloc_list); | 367 | INIT_LIST_HEAD(&device->dev_alloc_list); |
| 368 | 368 | ||
| 369 | /* init readahead state */ | ||
| 370 | spin_lock_init(&device->reada_lock); | ||
| 371 | device->reada_curr_zone = NULL; | ||
| 372 | atomic_set(&device->reada_in_flight, 0); | ||
| 373 | device->reada_next = 0; | ||
| 374 | INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); | ||
| 375 | INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); | ||
| 376 | |||
| 369 | mutex_lock(&fs_devices->device_list_mutex); | 377 | mutex_lock(&fs_devices->device_list_mutex); |
| 370 | list_add_rcu(&device->dev_list, &fs_devices->devices); | 378 | list_add_rcu(&device->dev_list, &fs_devices->devices); |
| 371 | mutex_unlock(&fs_devices->device_list_mutex); | 379 | mutex_unlock(&fs_devices->device_list_mutex); |
| @@ -597,10 +605,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 597 | set_blocksize(bdev, 4096); | 605 | set_blocksize(bdev, 4096); |
| 598 | 606 | ||
| 599 | bh = btrfs_read_dev_super(bdev); | 607 | bh = btrfs_read_dev_super(bdev); |
| 600 | if (!bh) { | 608 | if (!bh) |
| 601 | ret = -EINVAL; | ||
| 602 | goto error_close; | 609 | goto error_close; |
| 603 | } | ||
| 604 | 610 | ||
| 605 | disk_super = (struct btrfs_super_block *)bh->b_data; | 611 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 606 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 612 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| @@ -655,7 +661,7 @@ error: | |||
| 655 | continue; | 661 | continue; |
| 656 | } | 662 | } |
| 657 | if (fs_devices->open_devices == 0) { | 663 | if (fs_devices->open_devices == 0) { |
| 658 | ret = -EIO; | 664 | ret = -EINVAL; |
| 659 | goto out; | 665 | goto out; |
| 660 | } | 666 | } |
| 661 | fs_devices->seeding = seeding; | 667 | fs_devices->seeding = seeding; |
| @@ -1013,8 +1019,13 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | |||
| 1013 | } | 1019 | } |
| 1014 | BUG_ON(ret); | 1020 | BUG_ON(ret); |
| 1015 | 1021 | ||
| 1016 | if (device->bytes_used > 0) | 1022 | if (device->bytes_used > 0) { |
| 1017 | device->bytes_used -= btrfs_dev_extent_length(leaf, extent); | 1023 | u64 len = btrfs_dev_extent_length(leaf, extent); |
| 1024 | device->bytes_used -= len; | ||
| 1025 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 1026 | root->fs_info->free_chunk_space += len; | ||
| 1027 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 1028 | } | ||
| 1018 | ret = btrfs_del_item(trans, root, path); | 1029 | ret = btrfs_del_item(trans, root, path); |
| 1019 | 1030 | ||
| 1020 | out: | 1031 | out: |
| @@ -1356,6 +1367,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1356 | if (ret) | 1367 | if (ret) |
| 1357 | goto error_undo; | 1368 | goto error_undo; |
| 1358 | 1369 | ||
| 1370 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 1371 | root->fs_info->free_chunk_space = device->total_bytes - | ||
| 1372 | device->bytes_used; | ||
| 1373 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 1374 | |||
| 1359 | device->in_fs_metadata = 0; | 1375 | device->in_fs_metadata = 0; |
| 1360 | btrfs_scrub_cancel_dev(root, device); | 1376 | btrfs_scrub_cancel_dev(root, device); |
| 1361 | 1377 | ||
| @@ -1387,8 +1403,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1387 | call_rcu(&device->rcu, free_device); | 1403 | call_rcu(&device->rcu, free_device); |
| 1388 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 1404 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 1389 | 1405 | ||
| 1390 | num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | 1406 | num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
| 1391 | btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); | 1407 | btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); |
| 1392 | 1408 | ||
| 1393 | if (cur_devices->open_devices == 0) { | 1409 | if (cur_devices->open_devices == 0) { |
| 1394 | struct btrfs_fs_devices *fs_devices; | 1410 | struct btrfs_fs_devices *fs_devices; |
| @@ -1450,7 +1466,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, | |||
| 1450 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 1466 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
| 1451 | struct btrfs_fs_devices *old_devices; | 1467 | struct btrfs_fs_devices *old_devices; |
| 1452 | struct btrfs_fs_devices *seed_devices; | 1468 | struct btrfs_fs_devices *seed_devices; |
| 1453 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | 1469 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; |
| 1454 | struct btrfs_device *device; | 1470 | struct btrfs_device *device; |
| 1455 | u64 super_flags; | 1471 | u64 super_flags; |
| 1456 | 1472 | ||
| @@ -1691,15 +1707,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1691 | root->fs_info->fs_devices->num_can_discard++; | 1707 | root->fs_info->fs_devices->num_can_discard++; |
| 1692 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 1708 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
| 1693 | 1709 | ||
| 1710 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 1711 | root->fs_info->free_chunk_space += device->total_bytes; | ||
| 1712 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 1713 | |||
| 1694 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 1714 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
| 1695 | root->fs_info->fs_devices->rotating = 1; | 1715 | root->fs_info->fs_devices->rotating = 1; |
| 1696 | 1716 | ||
| 1697 | total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); | 1717 | total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); |
| 1698 | btrfs_set_super_total_bytes(&root->fs_info->super_copy, | 1718 | btrfs_set_super_total_bytes(root->fs_info->super_copy, |
| 1699 | total_bytes + device->total_bytes); | 1719 | total_bytes + device->total_bytes); |
| 1700 | 1720 | ||
| 1701 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); | 1721 | total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); |
| 1702 | btrfs_set_super_num_devices(&root->fs_info->super_copy, | 1722 | btrfs_set_super_num_devices(root->fs_info->super_copy, |
| 1703 | total_bytes + 1); | 1723 | total_bytes + 1); |
| 1704 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 1724 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 1705 | 1725 | ||
| @@ -1790,7 +1810,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
| 1790 | struct btrfs_device *device, u64 new_size) | 1810 | struct btrfs_device *device, u64 new_size) |
| 1791 | { | 1811 | { |
| 1792 | struct btrfs_super_block *super_copy = | 1812 | struct btrfs_super_block *super_copy = |
| 1793 | &device->dev_root->fs_info->super_copy; | 1813 | device->dev_root->fs_info->super_copy; |
| 1794 | u64 old_total = btrfs_super_total_bytes(super_copy); | 1814 | u64 old_total = btrfs_super_total_bytes(super_copy); |
| 1795 | u64 diff = new_size - device->total_bytes; | 1815 | u64 diff = new_size - device->total_bytes; |
| 1796 | 1816 | ||
| @@ -1849,7 +1869,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | |||
| 1849 | static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | 1869 | static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 |
| 1850 | chunk_offset) | 1870 | chunk_offset) |
| 1851 | { | 1871 | { |
| 1852 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 1872 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
| 1853 | struct btrfs_disk_key *disk_key; | 1873 | struct btrfs_disk_key *disk_key; |
| 1854 | struct btrfs_chunk *chunk; | 1874 | struct btrfs_chunk *chunk; |
| 1855 | u8 *ptr; | 1875 | u8 *ptr; |
| @@ -2175,7 +2195,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2175 | bool retried = false; | 2195 | bool retried = false; |
| 2176 | struct extent_buffer *l; | 2196 | struct extent_buffer *l; |
| 2177 | struct btrfs_key key; | 2197 | struct btrfs_key key; |
| 2178 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2198 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
| 2179 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2199 | u64 old_total = btrfs_super_total_bytes(super_copy); |
| 2180 | u64 old_size = device->total_bytes; | 2200 | u64 old_size = device->total_bytes; |
| 2181 | u64 diff = device->total_bytes - new_size; | 2201 | u64 diff = device->total_bytes - new_size; |
| @@ -2192,8 +2212,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2192 | lock_chunks(root); | 2212 | lock_chunks(root); |
| 2193 | 2213 | ||
| 2194 | device->total_bytes = new_size; | 2214 | device->total_bytes = new_size; |
| 2195 | if (device->writeable) | 2215 | if (device->writeable) { |
| 2196 | device->fs_devices->total_rw_bytes -= diff; | 2216 | device->fs_devices->total_rw_bytes -= diff; |
| 2217 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 2218 | root->fs_info->free_chunk_space -= diff; | ||
| 2219 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 2220 | } | ||
| 2197 | unlock_chunks(root); | 2221 | unlock_chunks(root); |
| 2198 | 2222 | ||
| 2199 | again: | 2223 | again: |
| @@ -2257,6 +2281,9 @@ again: | |||
| 2257 | device->total_bytes = old_size; | 2281 | device->total_bytes = old_size; |
| 2258 | if (device->writeable) | 2282 | if (device->writeable) |
| 2259 | device->fs_devices->total_rw_bytes += diff; | 2283 | device->fs_devices->total_rw_bytes += diff; |
| 2284 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 2285 | root->fs_info->free_chunk_space += diff; | ||
| 2286 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 2260 | unlock_chunks(root); | 2287 | unlock_chunks(root); |
| 2261 | goto done; | 2288 | goto done; |
| 2262 | } | 2289 | } |
| @@ -2292,7 +2319,7 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, | |||
| 2292 | struct btrfs_key *key, | 2319 | struct btrfs_key *key, |
| 2293 | struct btrfs_chunk *chunk, int item_size) | 2320 | struct btrfs_chunk *chunk, int item_size) |
| 2294 | { | 2321 | { |
| 2295 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2322 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
| 2296 | struct btrfs_disk_key disk_key; | 2323 | struct btrfs_disk_key disk_key; |
| 2297 | u32 array_size; | 2324 | u32 array_size; |
| 2298 | u8 *ptr; | 2325 | u8 *ptr; |
| @@ -2615,6 +2642,11 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 2615 | index++; | 2642 | index++; |
| 2616 | } | 2643 | } |
| 2617 | 2644 | ||
| 2645 | spin_lock(&extent_root->fs_info->free_chunk_lock); | ||
| 2646 | extent_root->fs_info->free_chunk_space -= (stripe_size * | ||
| 2647 | map->num_stripes); | ||
| 2648 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | ||
| 2649 | |||
| 2618 | index = 0; | 2650 | index = 0; |
| 2619 | stripe = &chunk->stripe; | 2651 | stripe = &chunk->stripe; |
| 2620 | while (index < map->num_stripes) { | 2652 | while (index < map->num_stripes) { |
| @@ -2848,7 +2880,7 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, | |||
| 2848 | 2880 | ||
| 2849 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 2881 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
| 2850 | u64 logical, u64 *length, | 2882 | u64 logical, u64 *length, |
| 2851 | struct btrfs_multi_bio **multi_ret, | 2883 | struct btrfs_bio **bbio_ret, |
| 2852 | int mirror_num) | 2884 | int mirror_num) |
| 2853 | { | 2885 | { |
| 2854 | struct extent_map *em; | 2886 | struct extent_map *em; |
| @@ -2866,18 +2898,18 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 2866 | int i; | 2898 | int i; |
| 2867 | int num_stripes; | 2899 | int num_stripes; |
| 2868 | int max_errors = 0; | 2900 | int max_errors = 0; |
| 2869 | struct btrfs_multi_bio *multi = NULL; | 2901 | struct btrfs_bio *bbio = NULL; |
| 2870 | 2902 | ||
| 2871 | if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) | 2903 | if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) |
| 2872 | stripes_allocated = 1; | 2904 | stripes_allocated = 1; |
| 2873 | again: | 2905 | again: |
| 2874 | if (multi_ret) { | 2906 | if (bbio_ret) { |
| 2875 | multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), | 2907 | bbio = kzalloc(btrfs_bio_size(stripes_allocated), |
| 2876 | GFP_NOFS); | 2908 | GFP_NOFS); |
| 2877 | if (!multi) | 2909 | if (!bbio) |
| 2878 | return -ENOMEM; | 2910 | return -ENOMEM; |
| 2879 | 2911 | ||
| 2880 | atomic_set(&multi->error, 0); | 2912 | atomic_set(&bbio->error, 0); |
| 2881 | } | 2913 | } |
| 2882 | 2914 | ||
| 2883 | read_lock(&em_tree->lock); | 2915 | read_lock(&em_tree->lock); |
| @@ -2898,7 +2930,7 @@ again: | |||
| 2898 | if (mirror_num > map->num_stripes) | 2930 | if (mirror_num > map->num_stripes) |
| 2899 | mirror_num = 0; | 2931 | mirror_num = 0; |
| 2900 | 2932 | ||
| 2901 | /* if our multi bio struct is too small, back off and try again */ | 2933 | /* if our btrfs_bio struct is too small, back off and try again */ |
| 2902 | if (rw & REQ_WRITE) { | 2934 | if (rw & REQ_WRITE) { |
| 2903 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 2935 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
| 2904 | BTRFS_BLOCK_GROUP_DUP)) { | 2936 | BTRFS_BLOCK_GROUP_DUP)) { |
| @@ -2917,11 +2949,11 @@ again: | |||
| 2917 | stripes_required = map->num_stripes; | 2949 | stripes_required = map->num_stripes; |
| 2918 | } | 2950 | } |
| 2919 | } | 2951 | } |
| 2920 | if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && | 2952 | if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && |
| 2921 | stripes_allocated < stripes_required) { | 2953 | stripes_allocated < stripes_required) { |
| 2922 | stripes_allocated = map->num_stripes; | 2954 | stripes_allocated = map->num_stripes; |
| 2923 | free_extent_map(em); | 2955 | free_extent_map(em); |
| 2924 | kfree(multi); | 2956 | kfree(bbio); |
| 2925 | goto again; | 2957 | goto again; |
| 2926 | } | 2958 | } |
| 2927 | stripe_nr = offset; | 2959 | stripe_nr = offset; |
| @@ -2950,7 +2982,7 @@ again: | |||
| 2950 | *length = em->len - offset; | 2982 | *length = em->len - offset; |
| 2951 | } | 2983 | } |
| 2952 | 2984 | ||
| 2953 | if (!multi_ret) | 2985 | if (!bbio_ret) |
| 2954 | goto out; | 2986 | goto out; |
| 2955 | 2987 | ||
| 2956 | num_stripes = 1; | 2988 | num_stripes = 1; |
| @@ -2975,13 +3007,17 @@ again: | |||
| 2975 | stripe_index = find_live_mirror(map, 0, | 3007 | stripe_index = find_live_mirror(map, 0, |
| 2976 | map->num_stripes, | 3008 | map->num_stripes, |
| 2977 | current->pid % map->num_stripes); | 3009 | current->pid % map->num_stripes); |
| 3010 | mirror_num = stripe_index + 1; | ||
| 2978 | } | 3011 | } |
| 2979 | 3012 | ||
| 2980 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3013 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
| 2981 | if (rw & (REQ_WRITE | REQ_DISCARD)) | 3014 | if (rw & (REQ_WRITE | REQ_DISCARD)) { |
| 2982 | num_stripes = map->num_stripes; | 3015 | num_stripes = map->num_stripes; |
| 2983 | else if (mirror_num) | 3016 | } else if (mirror_num) { |
| 2984 | stripe_index = mirror_num - 1; | 3017 | stripe_index = mirror_num - 1; |
| 3018 | } else { | ||
| 3019 | mirror_num = 1; | ||
| 3020 | } | ||
| 2985 | 3021 | ||
| 2986 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | 3022 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { |
| 2987 | int factor = map->num_stripes / map->sub_stripes; | 3023 | int factor = map->num_stripes / map->sub_stripes; |
| @@ -3001,6 +3037,7 @@ again: | |||
| 3001 | stripe_index = find_live_mirror(map, stripe_index, | 3037 | stripe_index = find_live_mirror(map, stripe_index, |
| 3002 | map->sub_stripes, stripe_index + | 3038 | map->sub_stripes, stripe_index + |
| 3003 | current->pid % map->sub_stripes); | 3039 | current->pid % map->sub_stripes); |
| 3040 | mirror_num = stripe_index + 1; | ||
| 3004 | } | 3041 | } |
| 3005 | } else { | 3042 | } else { |
| 3006 | /* | 3043 | /* |
| @@ -3009,15 +3046,16 @@ again: | |||
| 3009 | * stripe_index is the number of our device in the stripe array | 3046 | * stripe_index is the number of our device in the stripe array |
| 3010 | */ | 3047 | */ |
| 3011 | stripe_index = do_div(stripe_nr, map->num_stripes); | 3048 | stripe_index = do_div(stripe_nr, map->num_stripes); |
| 3049 | mirror_num = stripe_index + 1; | ||
| 3012 | } | 3050 | } |
| 3013 | BUG_ON(stripe_index >= map->num_stripes); | 3051 | BUG_ON(stripe_index >= map->num_stripes); |
| 3014 | 3052 | ||
| 3015 | if (rw & REQ_DISCARD) { | 3053 | if (rw & REQ_DISCARD) { |
| 3016 | for (i = 0; i < num_stripes; i++) { | 3054 | for (i = 0; i < num_stripes; i++) { |
| 3017 | multi->stripes[i].physical = | 3055 | bbio->stripes[i].physical = |
| 3018 | map->stripes[stripe_index].physical + | 3056 | map->stripes[stripe_index].physical + |
| 3019 | stripe_offset + stripe_nr * map->stripe_len; | 3057 | stripe_offset + stripe_nr * map->stripe_len; |
| 3020 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | 3058 | bbio->stripes[i].dev = map->stripes[stripe_index].dev; |
| 3021 | 3059 | ||
| 3022 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 3060 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { |
| 3023 | u64 stripes; | 3061 | u64 stripes; |
| @@ -3038,16 +3076,16 @@ again: | |||
| 3038 | } | 3076 | } |
| 3039 | stripes = stripe_nr_end - 1 - j; | 3077 | stripes = stripe_nr_end - 1 - j; |
| 3040 | do_div(stripes, map->num_stripes); | 3078 | do_div(stripes, map->num_stripes); |
| 3041 | multi->stripes[i].length = map->stripe_len * | 3079 | bbio->stripes[i].length = map->stripe_len * |
| 3042 | (stripes - stripe_nr + 1); | 3080 | (stripes - stripe_nr + 1); |
| 3043 | 3081 | ||
| 3044 | if (i == 0) { | 3082 | if (i == 0) { |
| 3045 | multi->stripes[i].length -= | 3083 | bbio->stripes[i].length -= |
| 3046 | stripe_offset; | 3084 | stripe_offset; |
| 3047 | stripe_offset = 0; | 3085 | stripe_offset = 0; |
| 3048 | } | 3086 | } |
| 3049 | if (stripe_index == last_stripe) | 3087 | if (stripe_index == last_stripe) |
| 3050 | multi->stripes[i].length -= | 3088 | bbio->stripes[i].length -= |
| 3051 | stripe_end_offset; | 3089 | stripe_end_offset; |
| 3052 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | 3090 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { |
| 3053 | u64 stripes; | 3091 | u64 stripes; |
| @@ -3072,11 +3110,11 @@ again: | |||
| 3072 | } | 3110 | } |
| 3073 | stripes = stripe_nr_end - 1 - j; | 3111 | stripes = stripe_nr_end - 1 - j; |
| 3074 | do_div(stripes, factor); | 3112 | do_div(stripes, factor); |
| 3075 | multi->stripes[i].length = map->stripe_len * | 3113 | bbio->stripes[i].length = map->stripe_len * |
| 3076 | (stripes - stripe_nr + 1); | 3114 | (stripes - stripe_nr + 1); |
| 3077 | 3115 | ||
| 3078 | if (i < map->sub_stripes) { | 3116 | if (i < map->sub_stripes) { |
| 3079 | multi->stripes[i].length -= | 3117 | bbio->stripes[i].length -= |
| 3080 | stripe_offset; | 3118 | stripe_offset; |
| 3081 | if (i == map->sub_stripes - 1) | 3119 | if (i == map->sub_stripes - 1) |
| 3082 | stripe_offset = 0; | 3120 | stripe_offset = 0; |
| @@ -3084,11 +3122,11 @@ again: | |||
| 3084 | if (stripe_index >= last_stripe && | 3122 | if (stripe_index >= last_stripe && |
| 3085 | stripe_index <= (last_stripe + | 3123 | stripe_index <= (last_stripe + |
| 3086 | map->sub_stripes - 1)) { | 3124 | map->sub_stripes - 1)) { |
| 3087 | multi->stripes[i].length -= | 3125 | bbio->stripes[i].length -= |
| 3088 | stripe_end_offset; | 3126 | stripe_end_offset; |
| 3089 | } | 3127 | } |
| 3090 | } else | 3128 | } else |
| 3091 | multi->stripes[i].length = *length; | 3129 | bbio->stripes[i].length = *length; |
| 3092 | 3130 | ||
| 3093 | stripe_index++; | 3131 | stripe_index++; |
| 3094 | if (stripe_index == map->num_stripes) { | 3132 | if (stripe_index == map->num_stripes) { |
| @@ -3099,19 +3137,20 @@ again: | |||
| 3099 | } | 3137 | } |
| 3100 | } else { | 3138 | } else { |
| 3101 | for (i = 0; i < num_stripes; i++) { | 3139 | for (i = 0; i < num_stripes; i++) { |
| 3102 | multi->stripes[i].physical = | 3140 | bbio->stripes[i].physical = |
| 3103 | map->stripes[stripe_index].physical + | 3141 | map->stripes[stripe_index].physical + |
| 3104 | stripe_offset + | 3142 | stripe_offset + |
| 3105 | stripe_nr * map->stripe_len; | 3143 | stripe_nr * map->stripe_len; |
| 3106 | multi->stripes[i].dev = | 3144 | bbio->stripes[i].dev = |
| 3107 | map->stripes[stripe_index].dev; | 3145 | map->stripes[stripe_index].dev; |
| 3108 | stripe_index++; | 3146 | stripe_index++; |
| 3109 | } | 3147 | } |
| 3110 | } | 3148 | } |
| 3111 | if (multi_ret) { | 3149 | if (bbio_ret) { |
| 3112 | *multi_ret = multi; | 3150 | *bbio_ret = bbio; |
| 3113 | multi->num_stripes = num_stripes; | 3151 | bbio->num_stripes = num_stripes; |
| 3114 | multi->max_errors = max_errors; | 3152 | bbio->max_errors = max_errors; |
| 3153 | bbio->mirror_num = mirror_num; | ||
| 3115 | } | 3154 | } |
| 3116 | out: | 3155 | out: |
| 3117 | free_extent_map(em); | 3156 | free_extent_map(em); |
| @@ -3120,9 +3159,9 @@ out: | |||
| 3120 | 3159 | ||
| 3121 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 3160 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
| 3122 | u64 logical, u64 *length, | 3161 | u64 logical, u64 *length, |
| 3123 | struct btrfs_multi_bio **multi_ret, int mirror_num) | 3162 | struct btrfs_bio **bbio_ret, int mirror_num) |
| 3124 | { | 3163 | { |
| 3125 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, | 3164 | return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret, |
| 3126 | mirror_num); | 3165 | mirror_num); |
| 3127 | } | 3166 | } |
| 3128 | 3167 | ||
| @@ -3191,28 +3230,30 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 3191 | return 0; | 3230 | return 0; |
| 3192 | } | 3231 | } |
| 3193 | 3232 | ||
| 3194 | static void end_bio_multi_stripe(struct bio *bio, int err) | 3233 | static void btrfs_end_bio(struct bio *bio, int err) |
| 3195 | { | 3234 | { |
| 3196 | struct btrfs_multi_bio *multi = bio->bi_private; | 3235 | struct btrfs_bio *bbio = bio->bi_private; |
| 3197 | int is_orig_bio = 0; | 3236 | int is_orig_bio = 0; |
| 3198 | 3237 | ||
| 3199 | if (err) | 3238 | if (err) |
| 3200 | atomic_inc(&multi->error); | 3239 | atomic_inc(&bbio->error); |
| 3201 | 3240 | ||
| 3202 | if (bio == multi->orig_bio) | 3241 | if (bio == bbio->orig_bio) |
| 3203 | is_orig_bio = 1; | 3242 | is_orig_bio = 1; |
| 3204 | 3243 | ||
| 3205 | if (atomic_dec_and_test(&multi->stripes_pending)) { | 3244 | if (atomic_dec_and_test(&bbio->stripes_pending)) { |
| 3206 | if (!is_orig_bio) { | 3245 | if (!is_orig_bio) { |
| 3207 | bio_put(bio); | 3246 | bio_put(bio); |
| 3208 | bio = multi->orig_bio; | 3247 | bio = bbio->orig_bio; |
| 3209 | } | 3248 | } |
| 3210 | bio->bi_private = multi->private; | 3249 | bio->bi_private = bbio->private; |
| 3211 | bio->bi_end_io = multi->end_io; | 3250 | bio->bi_end_io = bbio->end_io; |
| 3251 | bio->bi_bdev = (struct block_device *) | ||
| 3252 | (unsigned long)bbio->mirror_num; | ||
| 3212 | /* only send an error to the higher layers if it is | 3253 | /* only send an error to the higher layers if it is |
| 3213 | * beyond the tolerance of the multi-bio | 3254 | * beyond the tolerance of the multi-bio |
| 3214 | */ | 3255 | */ |
| 3215 | if (atomic_read(&multi->error) > multi->max_errors) { | 3256 | if (atomic_read(&bbio->error) > bbio->max_errors) { |
| 3216 | err = -EIO; | 3257 | err = -EIO; |
| 3217 | } else if (err) { | 3258 | } else if (err) { |
| 3218 | /* | 3259 | /* |
| @@ -3222,7 +3263,7 @@ static void end_bio_multi_stripe(struct bio *bio, int err) | |||
| 3222 | set_bit(BIO_UPTODATE, &bio->bi_flags); | 3263 | set_bit(BIO_UPTODATE, &bio->bi_flags); |
| 3223 | err = 0; | 3264 | err = 0; |
| 3224 | } | 3265 | } |
| 3225 | kfree(multi); | 3266 | kfree(bbio); |
| 3226 | 3267 | ||
| 3227 | bio_endio(bio, err); | 3268 | bio_endio(bio, err); |
| 3228 | } else if (!is_orig_bio) { | 3269 | } else if (!is_orig_bio) { |
| @@ -3302,20 +3343,20 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 3302 | u64 logical = (u64)bio->bi_sector << 9; | 3343 | u64 logical = (u64)bio->bi_sector << 9; |
| 3303 | u64 length = 0; | 3344 | u64 length = 0; |
| 3304 | u64 map_length; | 3345 | u64 map_length; |
| 3305 | struct btrfs_multi_bio *multi = NULL; | ||
| 3306 | int ret; | 3346 | int ret; |
| 3307 | int dev_nr = 0; | 3347 | int dev_nr = 0; |
| 3308 | int total_devs = 1; | 3348 | int total_devs = 1; |
| 3349 | struct btrfs_bio *bbio = NULL; | ||
| 3309 | 3350 | ||
| 3310 | length = bio->bi_size; | 3351 | length = bio->bi_size; |
| 3311 | map_tree = &root->fs_info->mapping_tree; | 3352 | map_tree = &root->fs_info->mapping_tree; |
| 3312 | map_length = length; | 3353 | map_length = length; |
| 3313 | 3354 | ||
| 3314 | ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi, | 3355 | ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, |
| 3315 | mirror_num); | 3356 | mirror_num); |
| 3316 | BUG_ON(ret); | 3357 | BUG_ON(ret); |
| 3317 | 3358 | ||
| 3318 | total_devs = multi->num_stripes; | 3359 | total_devs = bbio->num_stripes; |
| 3319 | if (map_length < length) { | 3360 | if (map_length < length) { |
| 3320 | printk(KERN_CRIT "mapping failed logical %llu bio len %llu " | 3361 | printk(KERN_CRIT "mapping failed logical %llu bio len %llu " |
| 3321 | "len %llu\n", (unsigned long long)logical, | 3362 | "len %llu\n", (unsigned long long)logical, |
| @@ -3323,25 +3364,28 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 3323 | (unsigned long long)map_length); | 3364 | (unsigned long long)map_length); |
| 3324 | BUG(); | 3365 | BUG(); |
| 3325 | } | 3366 | } |
| 3326 | multi->end_io = first_bio->bi_end_io; | 3367 | |
| 3327 | multi->private = first_bio->bi_private; | 3368 | bbio->orig_bio = first_bio; |
| 3328 | multi->orig_bio = first_bio; | 3369 | bbio->private = first_bio->bi_private; |
| 3329 | atomic_set(&multi->stripes_pending, multi->num_stripes); | 3370 | bbio->end_io = first_bio->bi_end_io; |
| 3371 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | ||
| 3330 | 3372 | ||
| 3331 | while (dev_nr < total_devs) { | 3373 | while (dev_nr < total_devs) { |
| 3332 | if (total_devs > 1) { | 3374 | if (dev_nr < total_devs - 1) { |
| 3333 | if (dev_nr < total_devs - 1) { | 3375 | bio = bio_clone(first_bio, GFP_NOFS); |
| 3334 | bio = bio_clone(first_bio, GFP_NOFS); | 3376 | BUG_ON(!bio); |
| 3335 | BUG_ON(!bio); | 3377 | } else { |
| 3336 | } else { | 3378 | bio = first_bio; |
| 3337 | bio = first_bio; | ||
| 3338 | } | ||
| 3339 | bio->bi_private = multi; | ||
| 3340 | bio->bi_end_io = end_bio_multi_stripe; | ||
| 3341 | } | 3379 | } |
| 3342 | bio->bi_sector = multi->stripes[dev_nr].physical >> 9; | 3380 | bio->bi_private = bbio; |
| 3343 | dev = multi->stripes[dev_nr].dev; | 3381 | bio->bi_end_io = btrfs_end_bio; |
| 3382 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | ||
| 3383 | dev = bbio->stripes[dev_nr].dev; | ||
| 3344 | if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { | 3384 | if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { |
| 3385 | pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " | ||
| 3386 | "(%s id %llu), size=%u\n", rw, | ||
| 3387 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | ||
| 3388 | dev->name, dev->devid, bio->bi_size); | ||
| 3345 | bio->bi_bdev = dev->bdev; | 3389 | bio->bi_bdev = dev->bdev; |
| 3346 | if (async_submit) | 3390 | if (async_submit) |
| 3347 | schedule_bio(root, dev, rw, bio); | 3391 | schedule_bio(root, dev, rw, bio); |
| @@ -3354,8 +3398,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 3354 | } | 3398 | } |
| 3355 | dev_nr++; | 3399 | dev_nr++; |
| 3356 | } | 3400 | } |
| 3357 | if (total_devs == 1) | ||
| 3358 | kfree(multi); | ||
| 3359 | return 0; | 3401 | return 0; |
| 3360 | } | 3402 | } |
| 3361 | 3403 | ||
| @@ -3616,15 +3658,20 @@ static int read_one_dev(struct btrfs_root *root, | |||
| 3616 | fill_device_from_item(leaf, dev_item, device); | 3658 | fill_device_from_item(leaf, dev_item, device); |
| 3617 | device->dev_root = root->fs_info->dev_root; | 3659 | device->dev_root = root->fs_info->dev_root; |
| 3618 | device->in_fs_metadata = 1; | 3660 | device->in_fs_metadata = 1; |
| 3619 | if (device->writeable) | 3661 | if (device->writeable) { |
| 3620 | device->fs_devices->total_rw_bytes += device->total_bytes; | 3662 | device->fs_devices->total_rw_bytes += device->total_bytes; |
| 3663 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 3664 | root->fs_info->free_chunk_space += device->total_bytes - | ||
| 3665 | device->bytes_used; | ||
| 3666 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 3667 | } | ||
| 3621 | ret = 0; | 3668 | ret = 0; |
| 3622 | return ret; | 3669 | return ret; |
| 3623 | } | 3670 | } |
| 3624 | 3671 | ||
| 3625 | int btrfs_read_sys_array(struct btrfs_root *root) | 3672 | int btrfs_read_sys_array(struct btrfs_root *root) |
| 3626 | { | 3673 | { |
| 3627 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 3674 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
| 3628 | struct extent_buffer *sb; | 3675 | struct extent_buffer *sb; |
| 3629 | struct btrfs_disk_key *disk_key; | 3676 | struct btrfs_disk_key *disk_key; |
| 3630 | struct btrfs_chunk *chunk; | 3677 | struct btrfs_chunk *chunk; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6d866db4e177..ab5b1c49f352 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -92,6 +92,14 @@ struct btrfs_device { | |||
| 92 | struct btrfs_work work; | 92 | struct btrfs_work work; |
| 93 | struct rcu_head rcu; | 93 | struct rcu_head rcu; |
| 94 | struct work_struct rcu_work; | 94 | struct work_struct rcu_work; |
| 95 | |||
| 96 | /* readahead state */ | ||
| 97 | spinlock_t reada_lock; | ||
| 98 | atomic_t reada_in_flight; | ||
| 99 | u64 reada_next; | ||
| 100 | struct reada_zone *reada_curr_zone; | ||
| 101 | struct radix_tree_root reada_zones; | ||
| 102 | struct radix_tree_root reada_extents; | ||
| 95 | }; | 103 | }; |
| 96 | 104 | ||
| 97 | struct btrfs_fs_devices { | 105 | struct btrfs_fs_devices { |
| @@ -136,7 +144,10 @@ struct btrfs_bio_stripe { | |||
| 136 | u64 length; /* only used for discard mappings */ | 144 | u64 length; /* only used for discard mappings */ |
| 137 | }; | 145 | }; |
| 138 | 146 | ||
| 139 | struct btrfs_multi_bio { | 147 | struct btrfs_bio; |
| 148 | typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | ||
| 149 | |||
| 150 | struct btrfs_bio { | ||
| 140 | atomic_t stripes_pending; | 151 | atomic_t stripes_pending; |
| 141 | bio_end_io_t *end_io; | 152 | bio_end_io_t *end_io; |
| 142 | struct bio *orig_bio; | 153 | struct bio *orig_bio; |
| @@ -144,6 +155,7 @@ struct btrfs_multi_bio { | |||
| 144 | atomic_t error; | 155 | atomic_t error; |
| 145 | int max_errors; | 156 | int max_errors; |
| 146 | int num_stripes; | 157 | int num_stripes; |
| 158 | int mirror_num; | ||
| 147 | struct btrfs_bio_stripe stripes[]; | 159 | struct btrfs_bio_stripe stripes[]; |
| 148 | }; | 160 | }; |
| 149 | 161 | ||
| @@ -171,7 +183,7 @@ struct map_lookup { | |||
| 171 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | 183 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, |
| 172 | u64 end, u64 *length); | 184 | u64 end, u64 *length); |
| 173 | 185 | ||
| 174 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 186 | #define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ |
| 175 | (sizeof(struct btrfs_bio_stripe) * (n))) | 187 | (sizeof(struct btrfs_bio_stripe) * (n))) |
| 176 | 188 | ||
| 177 | int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | 189 | int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, |
| @@ -180,7 +192,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | |||
| 180 | u64 chunk_offset, u64 start, u64 num_bytes); | 192 | u64 chunk_offset, u64 start, u64 num_bytes); |
| 181 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 193 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
| 182 | u64 logical, u64 *length, | 194 | u64 logical, u64 *length, |
| 183 | struct btrfs_multi_bio **multi_ret, int mirror_num); | 195 | struct btrfs_bio **bbio_ret, int mirror_num); |
| 184 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 196 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
| 185 | u64 chunk_start, u64 physical, u64 devid, | 197 | u64 chunk_start, u64 physical, u64 devid, |
| 186 | u64 **logical, int *naddrs, int *stripe_len); | 198 | u64 **logical, int *naddrs, int *stripe_len); |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 426aa464f1af..3848b04e310e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -127,6 +127,17 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
| 127 | again: | 127 | again: |
| 128 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | 128 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), |
| 129 | name, name_len, value, size); | 129 | name, name_len, value, size); |
| 130 | /* | ||
| 131 | * If we're setting an xattr to a new value but the new value is say | ||
| 132 | * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting | ||
| 133 | * back from split_leaf. This is because it thinks we'll be extending | ||
| 134 | * the existing item size, but we're asking for enough space to add the | ||
| 135 | * item itself. So if we get EOVERFLOW just set ret to EEXIST and let | ||
| 136 | * the rest of the function figure it out. | ||
| 137 | */ | ||
| 138 | if (ret == -EOVERFLOW) | ||
| 139 | ret = -EEXIST; | ||
| 140 | |||
| 130 | if (ret == -EEXIST) { | 141 | if (ret == -EEXIST) { |
| 131 | if (flags & XATTR_CREATE) | 142 | if (flags & XATTR_CREATE) |
| 132 | goto out; | 143 | goto out; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c1f063cd1b0c..cf0b1539b321 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -645,20 +645,20 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
| 645 | } | 645 | } |
| 646 | 646 | ||
| 647 | static struct cifsLockInfo * | 647 | static struct cifsLockInfo * |
| 648 | cifs_lock_init(__u64 len, __u64 offset, __u8 type, __u16 netfid) | 648 | cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 netfid) |
| 649 | { | 649 | { |
| 650 | struct cifsLockInfo *li = | 650 | struct cifsLockInfo *lock = |
| 651 | kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); | 651 | kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); |
| 652 | if (!li) | 652 | if (!lock) |
| 653 | return li; | 653 | return lock; |
| 654 | li->netfid = netfid; | 654 | lock->offset = offset; |
| 655 | li->offset = offset; | 655 | lock->length = length; |
| 656 | li->length = len; | 656 | lock->type = type; |
| 657 | li->type = type; | 657 | lock->netfid = netfid; |
| 658 | li->pid = current->tgid; | 658 | lock->pid = current->tgid; |
| 659 | INIT_LIST_HEAD(&li->blist); | 659 | INIT_LIST_HEAD(&lock->blist); |
| 660 | init_waitqueue_head(&li->block_q); | 660 | init_waitqueue_head(&lock->block_q); |
| 661 | return li; | 661 | return lock; |
| 662 | } | 662 | } |
| 663 | 663 | ||
| 664 | static void | 664 | static void |
| @@ -672,7 +672,7 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock) | |||
| 672 | } | 672 | } |
| 673 | 673 | ||
| 674 | static bool | 674 | static bool |
| 675 | cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset, | 675 | __cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset, |
| 676 | __u64 length, __u8 type, __u16 netfid, | 676 | __u64 length, __u8 type, __u16 netfid, |
| 677 | struct cifsLockInfo **conf_lock) | 677 | struct cifsLockInfo **conf_lock) |
| 678 | { | 678 | { |
| @@ -694,6 +694,14 @@ cifs_find_lock_conflict(struct cifsInodeInfo *cinode, __u64 offset, | |||
| 694 | return false; | 694 | return false; |
| 695 | } | 695 | } |
| 696 | 696 | ||
| 697 | static bool | ||
| 698 | cifs_find_lock_conflict(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock, | ||
| 699 | struct cifsLockInfo **conf_lock) | ||
| 700 | { | ||
| 701 | return __cifs_find_lock_conflict(cinode, lock->offset, lock->length, | ||
| 702 | lock->type, lock->netfid, conf_lock); | ||
| 703 | } | ||
| 704 | |||
| 697 | static int | 705 | static int |
| 698 | cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, | 706 | cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, |
| 699 | __u8 type, __u16 netfid, struct file_lock *flock) | 707 | __u8 type, __u16 netfid, struct file_lock *flock) |
| @@ -704,8 +712,8 @@ cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, | |||
| 704 | 712 | ||
| 705 | mutex_lock(&cinode->lock_mutex); | 713 | mutex_lock(&cinode->lock_mutex); |
| 706 | 714 | ||
| 707 | exist = cifs_find_lock_conflict(cinode, offset, length, type, netfid, | 715 | exist = __cifs_find_lock_conflict(cinode, offset, length, type, netfid, |
| 708 | &conf_lock); | 716 | &conf_lock); |
| 709 | if (exist) { | 717 | if (exist) { |
| 710 | flock->fl_start = conf_lock->offset; | 718 | flock->fl_start = conf_lock->offset; |
| 711 | flock->fl_end = conf_lock->offset + conf_lock->length - 1; | 719 | flock->fl_end = conf_lock->offset + conf_lock->length - 1; |
| @@ -723,40 +731,27 @@ cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, | |||
| 723 | return rc; | 731 | return rc; |
| 724 | } | 732 | } |
| 725 | 733 | ||
| 726 | static int | 734 | static void |
| 727 | cifs_lock_add(struct cifsInodeInfo *cinode, __u64 len, __u64 offset, | 735 | cifs_lock_add(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock) |
| 728 | __u8 type, __u16 netfid) | ||
| 729 | { | 736 | { |
| 730 | struct cifsLockInfo *li; | ||
| 731 | |||
| 732 | li = cifs_lock_init(len, offset, type, netfid); | ||
| 733 | if (!li) | ||
| 734 | return -ENOMEM; | ||
| 735 | |||
| 736 | mutex_lock(&cinode->lock_mutex); | 737 | mutex_lock(&cinode->lock_mutex); |
| 737 | list_add_tail(&li->llist, &cinode->llist); | 738 | list_add_tail(&lock->llist, &cinode->llist); |
| 738 | mutex_unlock(&cinode->lock_mutex); | 739 | mutex_unlock(&cinode->lock_mutex); |
| 739 | return 0; | ||
| 740 | } | 740 | } |
| 741 | 741 | ||
| 742 | static int | 742 | static int |
| 743 | cifs_lock_add_if(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, | 743 | cifs_lock_add_if(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock, |
| 744 | __u8 type, __u16 netfid, bool wait) | 744 | bool wait) |
| 745 | { | 745 | { |
| 746 | struct cifsLockInfo *lock, *conf_lock; | 746 | struct cifsLockInfo *conf_lock; |
| 747 | bool exist; | 747 | bool exist; |
| 748 | int rc = 0; | 748 | int rc = 0; |
| 749 | 749 | ||
| 750 | lock = cifs_lock_init(length, offset, type, netfid); | ||
| 751 | if (!lock) | ||
| 752 | return -ENOMEM; | ||
| 753 | |||
| 754 | try_again: | 750 | try_again: |
| 755 | exist = false; | 751 | exist = false; |
| 756 | mutex_lock(&cinode->lock_mutex); | 752 | mutex_lock(&cinode->lock_mutex); |
| 757 | 753 | ||
| 758 | exist = cifs_find_lock_conflict(cinode, offset, length, type, netfid, | 754 | exist = cifs_find_lock_conflict(cinode, lock, &conf_lock); |
| 759 | &conf_lock); | ||
| 760 | if (!exist && cinode->can_cache_brlcks) { | 755 | if (!exist && cinode->can_cache_brlcks) { |
| 761 | list_add_tail(&lock->llist, &cinode->llist); | 756 | list_add_tail(&lock->llist, &cinode->llist); |
| 762 | mutex_unlock(&cinode->lock_mutex); | 757 | mutex_unlock(&cinode->lock_mutex); |
| @@ -775,13 +770,10 @@ try_again: | |||
| 775 | (lock->blist.next == &lock->blist)); | 770 | (lock->blist.next == &lock->blist)); |
| 776 | if (!rc) | 771 | if (!rc) |
| 777 | goto try_again; | 772 | goto try_again; |
| 778 | else { | 773 | mutex_lock(&cinode->lock_mutex); |
| 779 | mutex_lock(&cinode->lock_mutex); | 774 | list_del_init(&lock->blist); |
| 780 | list_del_init(&lock->blist); | ||
| 781 | } | ||
| 782 | } | 775 | } |
| 783 | 776 | ||
| 784 | kfree(lock); | ||
| 785 | mutex_unlock(&cinode->lock_mutex); | 777 | mutex_unlock(&cinode->lock_mutex); |
| 786 | return rc; | 778 | return rc; |
| 787 | } | 779 | } |
| @@ -933,7 +925,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
| 933 | else | 925 | else |
| 934 | type = CIFS_WRLCK; | 926 | type = CIFS_WRLCK; |
| 935 | 927 | ||
| 936 | lck = cifs_lock_init(length, flock->fl_start, type, | 928 | lck = cifs_lock_init(flock->fl_start, length, type, |
| 937 | cfile->netfid); | 929 | cfile->netfid); |
| 938 | if (!lck) { | 930 | if (!lck) { |
| 939 | rc = -ENOMEM; | 931 | rc = -ENOMEM; |
| @@ -1070,14 +1062,12 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u8 type, | |||
| 1070 | if (rc != 0) | 1062 | if (rc != 0) |
| 1071 | cERROR(1, "Error unlocking previously locked " | 1063 | cERROR(1, "Error unlocking previously locked " |
| 1072 | "range %d during test of lock", rc); | 1064 | "range %d during test of lock", rc); |
| 1073 | rc = 0; | 1065 | return 0; |
| 1074 | return rc; | ||
| 1075 | } | 1066 | } |
| 1076 | 1067 | ||
| 1077 | if (type & LOCKING_ANDX_SHARED_LOCK) { | 1068 | if (type & LOCKING_ANDX_SHARED_LOCK) { |
| 1078 | flock->fl_type = F_WRLCK; | 1069 | flock->fl_type = F_WRLCK; |
| 1079 | rc = 0; | 1070 | return 0; |
| 1080 | return rc; | ||
| 1081 | } | 1071 | } |
| 1082 | 1072 | ||
| 1083 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, | 1073 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, |
| @@ -1095,8 +1085,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u8 type, | |||
| 1095 | } else | 1085 | } else |
| 1096 | flock->fl_type = F_WRLCK; | 1086 | flock->fl_type = F_WRLCK; |
| 1097 | 1087 | ||
| 1098 | rc = 0; | 1088 | return 0; |
| 1099 | return rc; | ||
| 1100 | } | 1089 | } |
| 1101 | 1090 | ||
| 1102 | static void | 1091 | static void |
| @@ -1254,20 +1243,26 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u8 type, | |||
| 1254 | } | 1243 | } |
| 1255 | 1244 | ||
| 1256 | if (lock) { | 1245 | if (lock) { |
| 1257 | rc = cifs_lock_add_if(cinode, flock->fl_start, length, | 1246 | struct cifsLockInfo *lock; |
| 1258 | type, netfid, wait_flag); | 1247 | |
| 1248 | lock = cifs_lock_init(flock->fl_start, length, type, netfid); | ||
| 1249 | if (!lock) | ||
| 1250 | return -ENOMEM; | ||
| 1251 | |||
| 1252 | rc = cifs_lock_add_if(cinode, lock, wait_flag); | ||
| 1259 | if (rc < 0) | 1253 | if (rc < 0) |
| 1260 | return rc; | 1254 | kfree(lock); |
| 1261 | else if (!rc) | 1255 | if (rc <= 0) |
| 1262 | goto out; | 1256 | goto out; |
| 1263 | 1257 | ||
| 1264 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, | 1258 | rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length, |
| 1265 | flock->fl_start, 0, 1, type, wait_flag, 0); | 1259 | flock->fl_start, 0, 1, type, wait_flag, 0); |
| 1266 | if (rc == 0) { | 1260 | if (rc) { |
| 1267 | /* For Windows locks we must store them. */ | 1261 | kfree(lock); |
| 1268 | rc = cifs_lock_add(cinode, length, flock->fl_start, | 1262 | goto out; |
| 1269 | type, netfid); | ||
| 1270 | } | 1263 | } |
| 1264 | |||
| 1265 | cifs_lock_add(cinode, lock); | ||
| 1271 | } else if (unlock) | 1266 | } else if (unlock) |
| 1272 | rc = cifs_unlock_range(cfile, flock, xid); | 1267 | rc = cifs_unlock_range(cfile, flock, xid); |
| 1273 | 1268 | ||
diff --git a/fs/dcache.c b/fs/dcache.c index 274f13e2f094..a901c6901bce 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -546,9 +546,11 @@ int d_invalidate(struct dentry * dentry) | |||
| 546 | * would make it unreachable from the root, | 546 | * would make it unreachable from the root, |
| 547 | * we might still populate it if it was a | 547 | * we might still populate it if it was a |
| 548 | * working directory or similar). | 548 | * working directory or similar). |
| 549 | * We also need to leave mountpoints alone, | ||
| 550 | * directory or not. | ||
| 549 | */ | 551 | */ |
| 550 | if (dentry->d_count > 1) { | 552 | if (dentry->d_count > 1 && dentry->d_inode) { |
| 551 | if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { | 553 | if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { |
| 552 | spin_unlock(&dentry->d_lock); | 554 | spin_unlock(&dentry->d_lock); |
| 553 | return -EBUSY; | 555 | return -EBUSY; |
| 554 | } | 556 | } |
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index de4247021d25..5b6c9d1a2fb9 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c | |||
| @@ -53,6 +53,78 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this, | |||
| 53 | return 0; | 53 | return 0; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | /* | ||
| 57 | * jffs2_selected_compress: | ||
| 58 | * @compr: Explicit compression type to use (ie, JFFS2_COMPR_ZLIB). | ||
| 59 | * If 0, just take the first available compression mode. | ||
| 60 | * @data_in: Pointer to uncompressed data | ||
| 61 | * @cpage_out: Pointer to returned pointer to buffer for compressed data | ||
| 62 | * @datalen: On entry, holds the amount of data available for compression. | ||
| 63 | * On exit, expected to hold the amount of data actually compressed. | ||
| 64 | * @cdatalen: On entry, holds the amount of space available for compressed | ||
| 65 | * data. On exit, expected to hold the actual size of the compressed | ||
| 66 | * data. | ||
| 67 | * | ||
| 68 | * Returns: the compression type used. Zero is used to show that the data | ||
| 69 | * could not be compressed; probably because we couldn't find the requested | ||
| 70 | * compression mode. | ||
| 71 | */ | ||
| 72 | static int jffs2_selected_compress(u8 compr, unsigned char *data_in, | ||
| 73 | unsigned char **cpage_out, u32 *datalen, u32 *cdatalen) | ||
| 74 | { | ||
| 75 | struct jffs2_compressor *this; | ||
| 76 | int err, ret = JFFS2_COMPR_NONE; | ||
| 77 | uint32_t orig_slen, orig_dlen; | ||
| 78 | char *output_buf; | ||
| 79 | |||
| 80 | output_buf = kmalloc(*cdatalen, GFP_KERNEL); | ||
| 81 | if (!output_buf) { | ||
| 82 | printk(KERN_WARNING "JFFS2: No memory for compressor allocation. Compression failed.\n"); | ||
| 83 | return ret; | ||
| 84 | } | ||
| 85 | orig_slen = *datalen; | ||
| 86 | orig_dlen = *cdatalen; | ||
| 87 | spin_lock(&jffs2_compressor_list_lock); | ||
| 88 | list_for_each_entry(this, &jffs2_compressor_list, list) { | ||
| 89 | /* Skip decompress-only and disabled modules */ | ||
| 90 | if (!this->compress || this->disabled) | ||
| 91 | continue; | ||
| 92 | |||
| 93 | /* Skip if not the desired compression type */ | ||
| 94 | if (compr && (compr != this->compr)) | ||
| 95 | continue; | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Either compression type was unspecified, or we found our | ||
| 99 | * compressor; either way, we're good to go. | ||
| 100 | */ | ||
| 101 | this->usecount++; | ||
| 102 | spin_unlock(&jffs2_compressor_list_lock); | ||
| 103 | |||
| 104 | *datalen = orig_slen; | ||
| 105 | *cdatalen = orig_dlen; | ||
| 106 | err = this->compress(data_in, output_buf, datalen, cdatalen); | ||
| 107 | |||
| 108 | spin_lock(&jffs2_compressor_list_lock); | ||
| 109 | this->usecount--; | ||
| 110 | if (!err) { | ||
| 111 | /* Success */ | ||
| 112 | ret = this->compr; | ||
| 113 | this->stat_compr_blocks++; | ||
| 114 | this->stat_compr_orig_size += *datalen; | ||
| 115 | this->stat_compr_new_size += *cdatalen; | ||
| 116 | break; | ||
| 117 | } | ||
| 118 | } | ||
| 119 | spin_unlock(&jffs2_compressor_list_lock); | ||
| 120 | if (ret == JFFS2_COMPR_NONE) | ||
| 121 | kfree(output_buf); | ||
| 122 | else | ||
| 123 | *cpage_out = output_buf; | ||
| 124 | |||
| 125 | return ret; | ||
| 126 | } | ||
| 127 | |||
| 56 | /* jffs2_compress: | 128 | /* jffs2_compress: |
| 57 | * @data_in: Pointer to uncompressed data | 129 | * @data_in: Pointer to uncompressed data |
| 58 | * @cpage_out: Pointer to returned pointer to buffer for compressed data | 130 | * @cpage_out: Pointer to returned pointer to buffer for compressed data |
| @@ -76,47 +148,23 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f, | |||
| 76 | uint32_t *datalen, uint32_t *cdatalen) | 148 | uint32_t *datalen, uint32_t *cdatalen) |
| 77 | { | 149 | { |
| 78 | int ret = JFFS2_COMPR_NONE; | 150 | int ret = JFFS2_COMPR_NONE; |
| 79 | int compr_ret; | 151 | int mode, compr_ret; |
| 80 | struct jffs2_compressor *this, *best=NULL; | 152 | struct jffs2_compressor *this, *best=NULL; |
| 81 | unsigned char *output_buf = NULL, *tmp_buf; | 153 | unsigned char *output_buf = NULL, *tmp_buf; |
| 82 | uint32_t orig_slen, orig_dlen; | 154 | uint32_t orig_slen, orig_dlen; |
| 83 | uint32_t best_slen=0, best_dlen=0; | 155 | uint32_t best_slen=0, best_dlen=0; |
| 84 | 156 | ||
| 85 | switch (jffs2_compression_mode) { | 157 | if (c->mount_opts.override_compr) |
| 158 | mode = c->mount_opts.compr; | ||
| 159 | else | ||
| 160 | mode = jffs2_compression_mode; | ||
| 161 | |||
| 162 | switch (mode) { | ||
| 86 | case JFFS2_COMPR_MODE_NONE: | 163 | case JFFS2_COMPR_MODE_NONE: |
| 87 | break; | 164 | break; |
| 88 | case JFFS2_COMPR_MODE_PRIORITY: | 165 | case JFFS2_COMPR_MODE_PRIORITY: |
| 89 | output_buf = kmalloc(*cdatalen,GFP_KERNEL); | 166 | ret = jffs2_selected_compress(0, data_in, cpage_out, datalen, |
| 90 | if (!output_buf) { | 167 | cdatalen); |
| 91 | printk(KERN_WARNING "JFFS2: No memory for compressor allocation. Compression failed.\n"); | ||
| 92 | goto out; | ||
| 93 | } | ||
| 94 | orig_slen = *datalen; | ||
| 95 | orig_dlen = *cdatalen; | ||
| 96 | spin_lock(&jffs2_compressor_list_lock); | ||
| 97 | list_for_each_entry(this, &jffs2_compressor_list, list) { | ||
| 98 | /* Skip decompress-only backwards-compatibility and disabled modules */ | ||
| 99 | if ((!this->compress)||(this->disabled)) | ||
| 100 | continue; | ||
| 101 | |||
| 102 | this->usecount++; | ||
| 103 | spin_unlock(&jffs2_compressor_list_lock); | ||
| 104 | *datalen = orig_slen; | ||
| 105 | *cdatalen = orig_dlen; | ||
| 106 | compr_ret = this->compress(data_in, output_buf, datalen, cdatalen); | ||
| 107 | spin_lock(&jffs2_compressor_list_lock); | ||
| 108 | this->usecount--; | ||
| 109 | if (!compr_ret) { | ||
| 110 | ret = this->compr; | ||
| 111 | this->stat_compr_blocks++; | ||
| 112 | this->stat_compr_orig_size += *datalen; | ||
| 113 | this->stat_compr_new_size += *cdatalen; | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | spin_unlock(&jffs2_compressor_list_lock); | ||
| 118 | if (ret == JFFS2_COMPR_NONE) | ||
| 119 | kfree(output_buf); | ||
| 120 | break; | 168 | break; |
| 121 | case JFFS2_COMPR_MODE_SIZE: | 169 | case JFFS2_COMPR_MODE_SIZE: |
| 122 | case JFFS2_COMPR_MODE_FAVOURLZO: | 170 | case JFFS2_COMPR_MODE_FAVOURLZO: |
| @@ -174,22 +222,28 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f, | |||
| 174 | best->stat_compr_orig_size += best_slen; | 222 | best->stat_compr_orig_size += best_slen; |
| 175 | best->stat_compr_new_size += best_dlen; | 223 | best->stat_compr_new_size += best_dlen; |
| 176 | ret = best->compr; | 224 | ret = best->compr; |
| 225 | *cpage_out = output_buf; | ||
| 177 | } | 226 | } |
| 178 | spin_unlock(&jffs2_compressor_list_lock); | 227 | spin_unlock(&jffs2_compressor_list_lock); |
| 179 | break; | 228 | break; |
| 229 | case JFFS2_COMPR_MODE_FORCELZO: | ||
| 230 | ret = jffs2_selected_compress(JFFS2_COMPR_LZO, data_in, | ||
| 231 | cpage_out, datalen, cdatalen); | ||
| 232 | break; | ||
| 233 | case JFFS2_COMPR_MODE_FORCEZLIB: | ||
| 234 | ret = jffs2_selected_compress(JFFS2_COMPR_ZLIB, data_in, | ||
| 235 | cpage_out, datalen, cdatalen); | ||
| 236 | break; | ||
| 180 | default: | 237 | default: |
| 181 | printk(KERN_ERR "JFFS2: unknown compression mode.\n"); | 238 | printk(KERN_ERR "JFFS2: unknown compression mode.\n"); |
| 182 | } | 239 | } |
| 183 | out: | 240 | |
| 184 | if (ret == JFFS2_COMPR_NONE) { | 241 | if (ret == JFFS2_COMPR_NONE) { |
| 185 | *cpage_out = data_in; | 242 | *cpage_out = data_in; |
| 186 | *datalen = *cdatalen; | 243 | *datalen = *cdatalen; |
| 187 | none_stat_compr_blocks++; | 244 | none_stat_compr_blocks++; |
| 188 | none_stat_compr_size += *datalen; | 245 | none_stat_compr_size += *datalen; |
| 189 | } | 246 | } |
| 190 | else { | ||
| 191 | *cpage_out = output_buf; | ||
| 192 | } | ||
| 193 | return ret; | 247 | return ret; |
| 194 | } | 248 | } |
| 195 | 249 | ||
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h index 13bb7597ab39..5e91d578f4ed 100644 --- a/fs/jffs2/compr.h +++ b/fs/jffs2/compr.h | |||
| @@ -40,6 +40,8 @@ | |||
| 40 | #define JFFS2_COMPR_MODE_PRIORITY 1 | 40 | #define JFFS2_COMPR_MODE_PRIORITY 1 |
| 41 | #define JFFS2_COMPR_MODE_SIZE 2 | 41 | #define JFFS2_COMPR_MODE_SIZE 2 |
| 42 | #define JFFS2_COMPR_MODE_FAVOURLZO 3 | 42 | #define JFFS2_COMPR_MODE_FAVOURLZO 3 |
| 43 | #define JFFS2_COMPR_MODE_FORCELZO 4 | ||
| 44 | #define JFFS2_COMPR_MODE_FORCEZLIB 5 | ||
| 43 | 45 | ||
| 44 | #define FAVOUR_LZO_PERCENT 80 | 46 | #define FAVOUR_LZO_PERCENT 80 |
| 45 | 47 | ||
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 7286e44ac665..4b8afe39a87f 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
| @@ -379,7 +379,7 @@ void jffs2_dirty_inode(struct inode *inode, int flags) | |||
| 379 | jffs2_do_setattr(inode, &iattr); | 379 | jffs2_do_setattr(inode, &iattr); |
| 380 | } | 380 | } |
| 381 | 381 | ||
| 382 | int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) | 382 | int jffs2_do_remount_fs(struct super_block *sb, int *flags, char *data) |
| 383 | { | 383 | { |
| 384 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | 384 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); |
| 385 | 385 | ||
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 0bc6a6c80a56..55a0c1dceadf 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h | |||
| @@ -29,6 +29,11 @@ | |||
| 29 | 29 | ||
| 30 | struct jffs2_inodirty; | 30 | struct jffs2_inodirty; |
| 31 | 31 | ||
| 32 | struct jffs2_mount_opts { | ||
| 33 | bool override_compr; | ||
| 34 | unsigned int compr; | ||
| 35 | }; | ||
| 36 | |||
| 32 | /* A struct for the overall file system control. Pointers to | 37 | /* A struct for the overall file system control. Pointers to |
| 33 | jffs2_sb_info structs are named `c' in the source code. | 38 | jffs2_sb_info structs are named `c' in the source code. |
| 34 | Nee jffs_control | 39 | Nee jffs_control |
| @@ -126,6 +131,7 @@ struct jffs2_sb_info { | |||
| 126 | #endif | 131 | #endif |
| 127 | 132 | ||
| 128 | struct jffs2_summary *summary; /* Summary information */ | 133 | struct jffs2_summary *summary; /* Summary information */ |
| 134 | struct jffs2_mount_opts mount_opts; | ||
| 129 | 135 | ||
| 130 | #ifdef CONFIG_JFFS2_FS_XATTR | 136 | #ifdef CONFIG_JFFS2_FS_XATTR |
| 131 | #define XATTRINDEX_HASHSIZE (57) | 137 | #define XATTRINDEX_HASHSIZE (57) |
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 6c1755c59c0f..ab65ee3ec858 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
| @@ -176,7 +176,7 @@ void jffs2_dirty_inode(struct inode *inode, int flags); | |||
| 176 | struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, | 176 | struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, |
| 177 | struct jffs2_raw_inode *ri); | 177 | struct jffs2_raw_inode *ri); |
| 178 | int jffs2_statfs (struct dentry *, struct kstatfs *); | 178 | int jffs2_statfs (struct dentry *, struct kstatfs *); |
| 179 | int jffs2_remount_fs (struct super_block *, int *, char *); | 179 | int jffs2_do_remount_fs(struct super_block *, int *, char *); |
| 180 | int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); | 180 | int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); |
| 181 | void jffs2_gc_release_inode(struct jffs2_sb_info *c, | 181 | void jffs2_gc_release_inode(struct jffs2_sb_info *c, |
| 182 | struct jffs2_inode_info *f); | 182 | struct jffs2_inode_info *f); |
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 8d8cd3419d02..28107ca136e4 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c | |||
| @@ -275,9 +275,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) | |||
| 275 | else | 275 | else |
| 276 | c->mtd->unpoint(c->mtd, 0, c->mtd->size); | 276 | c->mtd->unpoint(c->mtd, 0, c->mtd->size); |
| 277 | #endif | 277 | #endif |
| 278 | if (s) | 278 | kfree(s); |
| 279 | kfree(s); | ||
| 280 | |||
| 281 | return ret; | 279 | return ret; |
| 282 | } | 280 | } |
| 283 | 281 | ||
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 853b8e300084..e7e974454115 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
| @@ -17,11 +17,13 @@ | |||
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | #include <linux/err.h> | 18 | #include <linux/err.h> |
| 19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
| 20 | #include <linux/parser.h> | ||
| 20 | #include <linux/jffs2.h> | 21 | #include <linux/jffs2.h> |
| 21 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
| 22 | #include <linux/mtd/super.h> | 23 | #include <linux/mtd/super.h> |
| 23 | #include <linux/ctype.h> | 24 | #include <linux/ctype.h> |
| 24 | #include <linux/namei.h> | 25 | #include <linux/namei.h> |
| 26 | #include <linux/seq_file.h> | ||
| 25 | #include <linux/exportfs.h> | 27 | #include <linux/exportfs.h> |
| 26 | #include "compr.h" | 28 | #include "compr.h" |
| 27 | #include "nodelist.h" | 29 | #include "nodelist.h" |
| @@ -75,6 +77,37 @@ static void jffs2_write_super(struct super_block *sb) | |||
| 75 | unlock_super(sb); | 77 | unlock_super(sb); |
| 76 | } | 78 | } |
| 77 | 79 | ||
| 80 | static const char *jffs2_compr_name(unsigned int compr) | ||
| 81 | { | ||
| 82 | switch (compr) { | ||
| 83 | case JFFS2_COMPR_MODE_NONE: | ||
| 84 | return "none"; | ||
| 85 | #ifdef CONFIG_JFFS2_LZO | ||
| 86 | case JFFS2_COMPR_MODE_FORCELZO: | ||
| 87 | return "lzo"; | ||
| 88 | #endif | ||
| 89 | #ifdef CONFIG_JFFS2_ZLIB | ||
| 90 | case JFFS2_COMPR_MODE_FORCEZLIB: | ||
| 91 | return "zlib"; | ||
| 92 | #endif | ||
| 93 | default: | ||
| 94 | /* should never happen; programmer error */ | ||
| 95 | WARN_ON(1); | ||
| 96 | return ""; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | static int jffs2_show_options(struct seq_file *s, struct vfsmount *mnt) | ||
| 101 | { | ||
| 102 | struct jffs2_sb_info *c = JFFS2_SB_INFO(mnt->mnt_sb); | ||
| 103 | struct jffs2_mount_opts *opts = &c->mount_opts; | ||
| 104 | |||
| 105 | if (opts->override_compr) | ||
| 106 | seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr)); | ||
| 107 | |||
| 108 | return 0; | ||
| 109 | } | ||
| 110 | |||
| 78 | static int jffs2_sync_fs(struct super_block *sb, int wait) | 111 | static int jffs2_sync_fs(struct super_block *sb, int wait) |
| 79 | { | 112 | { |
| 80 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | 113 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); |
| @@ -133,6 +166,85 @@ static const struct export_operations jffs2_export_ops = { | |||
| 133 | .fh_to_parent = jffs2_fh_to_parent, | 166 | .fh_to_parent = jffs2_fh_to_parent, |
| 134 | }; | 167 | }; |
| 135 | 168 | ||
| 169 | /* | ||
| 170 | * JFFS2 mount options. | ||
| 171 | * | ||
| 172 | * Opt_override_compr: override default compressor | ||
| 173 | * Opt_err: just end of array marker | ||
| 174 | */ | ||
| 175 | enum { | ||
| 176 | Opt_override_compr, | ||
| 177 | Opt_err, | ||
| 178 | }; | ||
| 179 | |||
| 180 | static const match_table_t tokens = { | ||
| 181 | {Opt_override_compr, "compr=%s"}, | ||
| 182 | {Opt_err, NULL}, | ||
| 183 | }; | ||
| 184 | |||
| 185 | static int jffs2_parse_options(struct jffs2_sb_info *c, char *data) | ||
| 186 | { | ||
| 187 | substring_t args[MAX_OPT_ARGS]; | ||
| 188 | char *p, *name; | ||
| 189 | |||
| 190 | if (!data) | ||
| 191 | return 0; | ||
| 192 | |||
| 193 | while ((p = strsep(&data, ","))) { | ||
| 194 | int token; | ||
| 195 | |||
| 196 | if (!*p) | ||
| 197 | continue; | ||
| 198 | |||
| 199 | token = match_token(p, tokens, args); | ||
| 200 | switch (token) { | ||
| 201 | case Opt_override_compr: | ||
| 202 | name = match_strdup(&args[0]); | ||
| 203 | |||
| 204 | if (!name) | ||
| 205 | return -ENOMEM; | ||
| 206 | if (!strcmp(name, "none")) | ||
| 207 | c->mount_opts.compr = JFFS2_COMPR_MODE_NONE; | ||
| 208 | #ifdef CONFIG_JFFS2_LZO | ||
| 209 | else if (!strcmp(name, "lzo")) | ||
| 210 | c->mount_opts.compr = JFFS2_COMPR_MODE_FORCELZO; | ||
| 211 | #endif | ||
| 212 | #ifdef CONFIG_JFFS2_ZLIB | ||
| 213 | else if (!strcmp(name, "zlib")) | ||
| 214 | c->mount_opts.compr = | ||
| 215 | JFFS2_COMPR_MODE_FORCEZLIB; | ||
| 216 | #endif | ||
| 217 | else { | ||
| 218 | printk(KERN_ERR "JFFS2 Error: unknown compressor \"%s\"", | ||
| 219 | name); | ||
| 220 | kfree(name); | ||
| 221 | return -EINVAL; | ||
| 222 | } | ||
| 223 | kfree(name); | ||
| 224 | c->mount_opts.override_compr = true; | ||
| 225 | break; | ||
| 226 | default: | ||
| 227 | printk(KERN_ERR "JFFS2 Error: unrecognized mount option '%s' or missing value\n", | ||
| 228 | p); | ||
| 229 | return -EINVAL; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | return 0; | ||
| 234 | } | ||
| 235 | |||
| 236 | static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data) | ||
| 237 | { | ||
| 238 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | ||
| 239 | int err; | ||
| 240 | |||
| 241 | err = jffs2_parse_options(c, data); | ||
| 242 | if (err) | ||
| 243 | return -EINVAL; | ||
| 244 | |||
| 245 | return jffs2_do_remount_fs(sb, flags, data); | ||
| 246 | } | ||
| 247 | |||
| 136 | static const struct super_operations jffs2_super_operations = | 248 | static const struct super_operations jffs2_super_operations = |
| 137 | { | 249 | { |
| 138 | .alloc_inode = jffs2_alloc_inode, | 250 | .alloc_inode = jffs2_alloc_inode, |
| @@ -143,6 +255,7 @@ static const struct super_operations jffs2_super_operations = | |||
| 143 | .remount_fs = jffs2_remount_fs, | 255 | .remount_fs = jffs2_remount_fs, |
| 144 | .evict_inode = jffs2_evict_inode, | 256 | .evict_inode = jffs2_evict_inode, |
| 145 | .dirty_inode = jffs2_dirty_inode, | 257 | .dirty_inode = jffs2_dirty_inode, |
| 258 | .show_options = jffs2_show_options, | ||
| 146 | .sync_fs = jffs2_sync_fs, | 259 | .sync_fs = jffs2_sync_fs, |
| 147 | }; | 260 | }; |
| 148 | 261 | ||
| @@ -166,6 +279,12 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 166 | c->os_priv = sb; | 279 | c->os_priv = sb; |
| 167 | sb->s_fs_info = c; | 280 | sb->s_fs_info = c; |
| 168 | 281 | ||
| 282 | ret = jffs2_parse_options(c, data); | ||
| 283 | if (ret) { | ||
| 284 | kfree(c); | ||
| 285 | return -EINVAL; | ||
| 286 | } | ||
| 287 | |||
| 169 | /* Initialize JFFS2 superblock locks, the further initialization will | 288 | /* Initialize JFFS2 superblock locks, the further initialization will |
| 170 | * be done later */ | 289 | * be done later */ |
| 171 | mutex_init(&c->alloc_sem); | 290 | mutex_init(&c->alloc_sem); |
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 4515bea0268f..b09e51d2f81f 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
| @@ -578,8 +578,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) | |||
| 578 | if (!jffs2_is_writebuffered(c)) | 578 | if (!jffs2_is_writebuffered(c)) |
| 579 | return 0; | 579 | return 0; |
| 580 | 580 | ||
| 581 | if (mutex_trylock(&c->alloc_sem)) { | 581 | if (!mutex_is_locked(&c->alloc_sem)) { |
| 582 | mutex_unlock(&c->alloc_sem); | ||
| 583 | printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n"); | 582 | printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n"); |
| 584 | BUG(); | 583 | BUG(); |
| 585 | } | 584 | } |
| @@ -1026,7 +1025,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, | |||
| 1026 | int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE); | 1025 | int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE); |
| 1027 | struct mtd_oob_ops ops; | 1026 | struct mtd_oob_ops ops; |
| 1028 | 1027 | ||
| 1029 | ops.mode = MTD_OOB_AUTO; | 1028 | ops.mode = MTD_OPS_AUTO_OOB; |
| 1030 | ops.ooblen = NR_OOB_SCAN_PAGES * c->oobavail; | 1029 | ops.ooblen = NR_OOB_SCAN_PAGES * c->oobavail; |
| 1031 | ops.oobbuf = c->oobbuf; | 1030 | ops.oobbuf = c->oobbuf; |
| 1032 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; | 1031 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; |
| @@ -1069,7 +1068,7 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c, | |||
| 1069 | struct mtd_oob_ops ops; | 1068 | struct mtd_oob_ops ops; |
| 1070 | int ret, cmlen = min_t(int, c->oobavail, OOB_CM_SIZE); | 1069 | int ret, cmlen = min_t(int, c->oobavail, OOB_CM_SIZE); |
| 1071 | 1070 | ||
| 1072 | ops.mode = MTD_OOB_AUTO; | 1071 | ops.mode = MTD_OPS_AUTO_OOB; |
| 1073 | ops.ooblen = cmlen; | 1072 | ops.ooblen = cmlen; |
| 1074 | ops.oobbuf = c->oobbuf; | 1073 | ops.oobbuf = c->oobbuf; |
| 1075 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; | 1074 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; |
| @@ -1095,7 +1094,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, | |||
| 1095 | struct mtd_oob_ops ops; | 1094 | struct mtd_oob_ops ops; |
| 1096 | int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE); | 1095 | int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE); |
| 1097 | 1096 | ||
| 1098 | ops.mode = MTD_OOB_AUTO; | 1097 | ops.mode = MTD_OPS_AUTO_OOB; |
| 1099 | ops.ooblen = cmlen; | 1098 | ops.ooblen = cmlen; |
| 1100 | ops.oobbuf = (uint8_t *)&oob_cleanmarker; | 1099 | ops.oobbuf = (uint8_t *)&oob_cleanmarker; |
| 1101 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; | 1100 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; |
diff --git a/fs/namei.c b/fs/namei.c index ac6d214da827..5008f01787f5 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -852,7 +852,7 @@ static int follow_managed(struct path *path, unsigned flags) | |||
| 852 | mntput(path->mnt); | 852 | mntput(path->mnt); |
| 853 | if (ret == -EISDIR) | 853 | if (ret == -EISDIR) |
| 854 | ret = 0; | 854 | ret = 0; |
| 855 | return ret; | 855 | return ret < 0 ? ret : need_mntput; |
| 856 | } | 856 | } |
| 857 | 857 | ||
| 858 | int follow_down_one(struct path *path) | 858 | int follow_down_one(struct path *path) |
| @@ -900,6 +900,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
| 900 | break; | 900 | break; |
| 901 | path->mnt = mounted; | 901 | path->mnt = mounted; |
| 902 | path->dentry = mounted->mnt_root; | 902 | path->dentry = mounted->mnt_root; |
| 903 | nd->flags |= LOOKUP_JUMPED; | ||
| 903 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | 904 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); |
| 904 | /* | 905 | /* |
| 905 | * Update the inode too. We don't need to re-check the | 906 | * Update the inode too. We don't need to re-check the |
| @@ -1213,6 +1214,8 @@ retry: | |||
| 1213 | path_put_conditional(path, nd); | 1214 | path_put_conditional(path, nd); |
| 1214 | return err; | 1215 | return err; |
| 1215 | } | 1216 | } |
| 1217 | if (err) | ||
| 1218 | nd->flags |= LOOKUP_JUMPED; | ||
| 1216 | *inode = path->dentry->d_inode; | 1219 | *inode = path->dentry->d_inode; |
| 1217 | return 0; | 1220 | return 0; |
| 1218 | } | 1221 | } |
| @@ -2146,6 +2149,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
| 2146 | } | 2149 | } |
| 2147 | 2150 | ||
| 2148 | /* create side of things */ | 2151 | /* create side of things */ |
| 2152 | /* | ||
| 2153 | * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been | ||
| 2154 | * cleared when we got to the last component we are about to look up | ||
| 2155 | */ | ||
| 2149 | error = complete_walk(nd); | 2156 | error = complete_walk(nd); |
| 2150 | if (error) | 2157 | if (error) |
| 2151 | return ERR_PTR(error); | 2158 | return ERR_PTR(error); |
| @@ -2214,6 +2221,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
| 2214 | if (error < 0) | 2221 | if (error < 0) |
| 2215 | goto exit_dput; | 2222 | goto exit_dput; |
| 2216 | 2223 | ||
| 2224 | if (error) | ||
| 2225 | nd->flags |= LOOKUP_JUMPED; | ||
| 2226 | |||
| 2217 | error = -ENOENT; | 2227 | error = -ENOENT; |
| 2218 | if (!path->dentry->d_inode) | 2228 | if (!path->dentry->d_inode) |
| 2219 | goto exit_dput; | 2229 | goto exit_dput; |
| @@ -2223,6 +2233,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
| 2223 | 2233 | ||
| 2224 | path_to_nameidata(path, nd); | 2234 | path_to_nameidata(path, nd); |
| 2225 | nd->inode = path->dentry->d_inode; | 2235 | nd->inode = path->dentry->d_inode; |
| 2236 | /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ | ||
| 2237 | error = complete_walk(nd); | ||
| 2238 | if (error) | ||
| 2239 | goto exit; | ||
| 2226 | error = -EISDIR; | 2240 | error = -EISDIR; |
| 2227 | if (S_ISDIR(nd->inode->i_mode)) | 2241 | if (S_ISDIR(nd->inode->i_mode)) |
| 2228 | goto exit; | 2242 | goto exit; |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index eef109a1a927..b09ba2dd8b62 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
| @@ -870,6 +870,22 @@ void dbg_dump_lpt_info(struct ubifs_info *c) | |||
| 870 | spin_unlock(&dbg_lock); | 870 | spin_unlock(&dbg_lock); |
| 871 | } | 871 | } |
| 872 | 872 | ||
| 873 | void dbg_dump_sleb(const struct ubifs_info *c, | ||
| 874 | const struct ubifs_scan_leb *sleb, int offs) | ||
| 875 | { | ||
| 876 | struct ubifs_scan_node *snod; | ||
| 877 | |||
| 878 | printk(KERN_DEBUG "(pid %d) start dumping scanned data from LEB %d:%d\n", | ||
| 879 | current->pid, sleb->lnum, offs); | ||
| 880 | |||
| 881 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
| 882 | cond_resched(); | ||
| 883 | printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", sleb->lnum, | ||
| 884 | snod->offs, snod->len); | ||
| 885 | dbg_dump_node(c, snod->node); | ||
| 886 | } | ||
| 887 | } | ||
| 888 | |||
| 873 | void dbg_dump_leb(const struct ubifs_info *c, int lnum) | 889 | void dbg_dump_leb(const struct ubifs_info *c, int lnum) |
| 874 | { | 890 | { |
| 875 | struct ubifs_scan_leb *sleb; | 891 | struct ubifs_scan_leb *sleb; |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index feb361e252ac..8d9c46810189 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
| @@ -269,6 +269,8 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); | |||
| 269 | void dbg_dump_lprops(struct ubifs_info *c); | 269 | void dbg_dump_lprops(struct ubifs_info *c); |
| 270 | void dbg_dump_lpt_info(struct ubifs_info *c); | 270 | void dbg_dump_lpt_info(struct ubifs_info *c); |
| 271 | void dbg_dump_leb(const struct ubifs_info *c, int lnum); | 271 | void dbg_dump_leb(const struct ubifs_info *c, int lnum); |
| 272 | void dbg_dump_sleb(const struct ubifs_info *c, | ||
| 273 | const struct ubifs_scan_leb *sleb, int offs); | ||
| 272 | void dbg_dump_znode(const struct ubifs_info *c, | 274 | void dbg_dump_znode(const struct ubifs_info *c, |
| 273 | const struct ubifs_znode *znode); | 275 | const struct ubifs_znode *znode); |
| 274 | void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); | 276 | void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); |
| @@ -387,6 +389,9 @@ static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; } | |||
| 387 | static inline void dbg_dump_leb(const struct ubifs_info *c, | 389 | static inline void dbg_dump_leb(const struct ubifs_info *c, |
| 388 | int lnum) { return; } | 390 | int lnum) { return; } |
| 389 | static inline void | 391 | static inline void |
| 392 | dbg_dump_sleb(const struct ubifs_info *c, | ||
| 393 | const struct ubifs_scan_leb *sleb, int offs) { return; } | ||
| 394 | static inline void | ||
| 390 | dbg_dump_znode(const struct ubifs_info *c, | 395 | dbg_dump_znode(const struct ubifs_info *c, |
| 391 | const struct ubifs_znode *znode) { return; } | 396 | const struct ubifs_znode *znode) { return; } |
| 392 | static inline void dbg_dump_heap(struct ubifs_info *c, | 397 | static inline void dbg_dump_heap(struct ubifs_info *c, |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index af02790d9328..ee4f43f4bb99 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
| @@ -983,7 +983,7 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) | |||
| 983 | } | 983 | } |
| 984 | 984 | ||
| 985 | /** | 985 | /** |
| 986 | * clean_an_unclean_leb - read and write a LEB to remove corruption. | 986 | * clean_an_unclean_leb - read and write a LEB to remove corruption. |
| 987 | * @c: UBIFS file-system description object | 987 | * @c: UBIFS file-system description object |
| 988 | * @ucleb: unclean LEB information | 988 | * @ucleb: unclean LEB information |
| 989 | * @sbuf: LEB-sized buffer to use | 989 | * @sbuf: LEB-sized buffer to use |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 93d938ad3d2a..6094c5a5d7a8 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
| @@ -247,7 +247,7 @@ static int create_default_filesystem(struct ubifs_info *c) | |||
| 247 | mst->total_dirty = cpu_to_le64(tmp64); | 247 | mst->total_dirty = cpu_to_le64(tmp64); |
| 248 | 248 | ||
| 249 | /* The indexing LEB does not contribute to dark space */ | 249 | /* The indexing LEB does not contribute to dark space */ |
| 250 | tmp64 = (c->main_lebs - 1) * c->dark_wm; | 250 | tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm); |
| 251 | mst->total_dark = cpu_to_le64(tmp64); | 251 | mst->total_dark = cpu_to_le64(tmp64); |
| 252 | 252 | ||
| 253 | mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); | 253 | mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); |
