diff options
author | Chris Mason <chris.mason@fusionio.com> | 2012-07-25 16:11:38 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2012-07-25 16:11:38 -0400 |
commit | b478b2baa37ac99fc04a30809c780dd5dfd43595 (patch) | |
tree | bed7af1466e5b1e0b0501eba18f77c804a864d7d /fs/btrfs | |
parent | 67c9684f48ea9cbc5e9b8a1feb3151800e9dcc22 (diff) | |
parent | 6f72c7e20dbaea55f04546de69586c84a3654503 (diff) |
Merge branch 'qgroup' of git://git.jan-o-sch.net/btrfs-unstable into for-linus
Conflicts:
fs/btrfs/ioctl.c
fs/btrfs/ioctl.h
fs/btrfs/transaction.c
fs/btrfs/transaction.h
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 30 | ||||
-rw-r--r-- | fs/btrfs/backref.h | 3 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 347 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 233 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 56 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.h | 62 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 134 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 6 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 119 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 244 | ||||
-rw-r--r-- | fs/btrfs/ioctl.h | 62 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 1571 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 59 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 11 |
15 files changed, 2697 insertions, 242 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0c4fa2befae7..0bc4d3a10a5f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
11 | reada.o backref.o ulist.o | 11 | reada.o backref.o ulist.o qgroup.o |
12 | 12 | ||
13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | 14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a383c18e74e8..7d80ddd8f544 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
773 | */ | 773 | */ |
774 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 774 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
775 | struct btrfs_fs_info *fs_info, u64 bytenr, | 775 | struct btrfs_fs_info *fs_info, u64 bytenr, |
776 | u64 delayed_ref_seq, u64 time_seq, | 776 | u64 time_seq, struct ulist *refs, |
777 | struct ulist *refs, struct ulist *roots, | 777 | struct ulist *roots, const u64 *extent_item_pos) |
778 | const u64 *extent_item_pos) | ||
779 | { | 778 | { |
780 | struct btrfs_key key; | 779 | struct btrfs_key key; |
781 | struct btrfs_path *path; | 780 | struct btrfs_path *path; |
@@ -837,7 +836,7 @@ again: | |||
837 | btrfs_put_delayed_ref(&head->node); | 836 | btrfs_put_delayed_ref(&head->node); |
838 | goto again; | 837 | goto again; |
839 | } | 838 | } |
840 | ret = __add_delayed_refs(head, delayed_ref_seq, | 839 | ret = __add_delayed_refs(head, time_seq, |
841 | &prefs_delayed); | 840 | &prefs_delayed); |
842 | mutex_unlock(&head->mutex); | 841 | mutex_unlock(&head->mutex); |
843 | if (ret) { | 842 | if (ret) { |
@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks) | |||
981 | */ | 980 | */ |
982 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | 981 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
983 | struct btrfs_fs_info *fs_info, u64 bytenr, | 982 | struct btrfs_fs_info *fs_info, u64 bytenr, |
984 | u64 delayed_ref_seq, u64 time_seq, | 983 | u64 time_seq, struct ulist **leafs, |
985 | struct ulist **leafs, | ||
986 | const u64 *extent_item_pos) | 984 | const u64 *extent_item_pos) |
987 | { | 985 | { |
988 | struct ulist *tmp; | 986 | struct ulist *tmp; |
@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
997 | return -ENOMEM; | 995 | return -ENOMEM; |
998 | } | 996 | } |
999 | 997 | ||
1000 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, | 998 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1001 | time_seq, *leafs, tmp, extent_item_pos); | 999 | time_seq, *leafs, tmp, extent_item_pos); |
1002 | ulist_free(tmp); | 1000 | ulist_free(tmp); |
1003 | 1001 | ||
@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1024 | */ | 1022 | */ |
1025 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1023 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
1026 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1024 | struct btrfs_fs_info *fs_info, u64 bytenr, |
1027 | u64 delayed_ref_seq, u64 time_seq, | 1025 | u64 time_seq, struct ulist **roots) |
1028 | struct ulist **roots) | ||
1029 | { | 1026 | { |
1030 | struct ulist *tmp; | 1027 | struct ulist *tmp; |
1031 | struct ulist_node *node = NULL; | 1028 | struct ulist_node *node = NULL; |
@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
1043 | 1040 | ||
1044 | ULIST_ITER_INIT(&uiter); | 1041 | ULIST_ITER_INIT(&uiter); |
1045 | while (1) { | 1042 | while (1) { |
1046 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, | 1043 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1047 | time_seq, tmp, *roots, NULL); | 1044 | time_seq, tmp, *roots, NULL); |
1048 | if (ret < 0 && ret != -ENOENT) { | 1045 | if (ret < 0 && ret != -ENOENT) { |
1049 | ulist_free(tmp); | 1046 | ulist_free(tmp); |
@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1376 | struct ulist *roots = NULL; | 1373 | struct ulist *roots = NULL; |
1377 | struct ulist_node *ref_node = NULL; | 1374 | struct ulist_node *ref_node = NULL; |
1378 | struct ulist_node *root_node = NULL; | 1375 | struct ulist_node *root_node = NULL; |
1379 | struct seq_list seq_elem = {}; | ||
1380 | struct seq_list tree_mod_seq_elem = {}; | 1376 | struct seq_list tree_mod_seq_elem = {}; |
1381 | struct ulist_iterator ref_uiter; | 1377 | struct ulist_iterator ref_uiter; |
1382 | struct ulist_iterator root_uiter; | 1378 | struct ulist_iterator root_uiter; |
1383 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | ||
1384 | 1379 | ||
1385 | pr_debug("resolving all inodes for extent %llu\n", | 1380 | pr_debug("resolving all inodes for extent %llu\n", |
1386 | extent_item_objectid); | 1381 | extent_item_objectid); |
@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1391 | trans = btrfs_join_transaction(fs_info->extent_root); | 1386 | trans = btrfs_join_transaction(fs_info->extent_root); |
1392 | if (IS_ERR(trans)) | 1387 | if (IS_ERR(trans)) |
1393 | return PTR_ERR(trans); | 1388 | return PTR_ERR(trans); |
1394 | |||
1395 | delayed_refs = &trans->transaction->delayed_refs; | ||
1396 | spin_lock(&delayed_refs->lock); | ||
1397 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); | ||
1398 | spin_unlock(&delayed_refs->lock); | ||
1399 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 1389 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
1400 | } | 1390 | } |
1401 | 1391 | ||
1402 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1392 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
1403 | seq_elem.seq, tree_mod_seq_elem.seq, &refs, | 1393 | tree_mod_seq_elem.seq, &refs, |
1404 | &extent_item_pos); | 1394 | &extent_item_pos); |
1405 | if (ret) | 1395 | if (ret) |
1406 | goto out; | 1396 | goto out; |
@@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1408 | ULIST_ITER_INIT(&ref_uiter); | 1398 | ULIST_ITER_INIT(&ref_uiter); |
1409 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { | 1399 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
1410 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, | 1400 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, |
1411 | seq_elem.seq, | 1401 | tree_mod_seq_elem.seq, &roots); |
1412 | tree_mod_seq_elem.seq, &roots); | ||
1413 | if (ret) | 1402 | if (ret) |
1414 | break; | 1403 | break; |
1415 | ULIST_ITER_INIT(&root_uiter); | 1404 | ULIST_ITER_INIT(&root_uiter); |
@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1431 | out: | 1420 | out: |
1432 | if (!search_commit_root) { | 1421 | if (!search_commit_root) { |
1433 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | 1422 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); |
1434 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); | ||
1435 | btrfs_end_transaction(trans, fs_info->extent_root); | 1423 | btrfs_end_transaction(trans, fs_info->extent_root); |
1436 | } | 1424 | } |
1437 | 1425 | ||
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index c18d8ac7b795..3a1ad3e2dcb0 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -58,8 +58,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | |||
58 | 58 | ||
59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
60 | struct btrfs_fs_info *fs_info, u64 bytenr, | 60 | struct btrfs_fs_info *fs_info, u64 bytenr, |
61 | u64 delayed_ref_seq, u64 time_seq, | 61 | u64 time_seq, struct ulist **roots); |
62 | struct ulist **roots); | ||
63 | 62 | ||
64 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 63 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
65 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 64 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 67fe46fdee6f..fb21431fe4e0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -321,7 +321,7 @@ struct tree_mod_root { | |||
321 | struct tree_mod_elem { | 321 | struct tree_mod_elem { |
322 | struct rb_node node; | 322 | struct rb_node node; |
323 | u64 index; /* shifted logical */ | 323 | u64 index; /* shifted logical */ |
324 | struct seq_list elem; | 324 | u64 seq; |
325 | enum mod_log_op op; | 325 | enum mod_log_op op; |
326 | 326 | ||
327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | 327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ |
@@ -341,20 +341,50 @@ struct tree_mod_elem { | |||
341 | struct tree_mod_root old_root; | 341 | struct tree_mod_root old_root; |
342 | }; | 342 | }; |
343 | 343 | ||
344 | static inline void | 344 | static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info) |
345 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
346 | { | 345 | { |
347 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | 346 | read_lock(&fs_info->tree_mod_log_lock); |
348 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
349 | } | 347 | } |
350 | 348 | ||
351 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | 349 | static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info) |
352 | struct seq_list *elem) | 350 | { |
351 | read_unlock(&fs_info->tree_mod_log_lock); | ||
352 | } | ||
353 | |||
354 | static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info) | ||
355 | { | ||
356 | write_lock(&fs_info->tree_mod_log_lock); | ||
357 | } | ||
358 | |||
359 | static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) | ||
353 | { | 360 | { |
354 | elem->flags = 1; | 361 | write_unlock(&fs_info->tree_mod_log_lock); |
362 | } | ||
363 | |||
364 | /* | ||
365 | * This adds a new blocker to the tree mod log's blocker list if the @elem | ||
366 | * passed does not already have a sequence number set. So when a caller expects | ||
367 | * to record tree modifications, it should ensure to set elem->seq to zero | ||
368 | * before calling btrfs_get_tree_mod_seq. | ||
369 | * Returns a fresh, unused tree log modification sequence number, even if no new | ||
370 | * blocker was added. | ||
371 | */ | ||
372 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
373 | struct seq_list *elem) | ||
374 | { | ||
375 | u64 seq; | ||
376 | |||
377 | tree_mod_log_write_lock(fs_info); | ||
355 | spin_lock(&fs_info->tree_mod_seq_lock); | 378 | spin_lock(&fs_info->tree_mod_seq_lock); |
356 | __get_tree_mod_seq(fs_info, elem); | 379 | if (!elem->seq) { |
380 | elem->seq = btrfs_inc_tree_mod_seq(fs_info); | ||
381 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
382 | } | ||
383 | seq = btrfs_inc_tree_mod_seq(fs_info); | ||
357 | spin_unlock(&fs_info->tree_mod_seq_lock); | 384 | spin_unlock(&fs_info->tree_mod_seq_lock); |
385 | tree_mod_log_write_unlock(fs_info); | ||
386 | |||
387 | return seq; | ||
358 | } | 388 | } |
359 | 389 | ||
360 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 390 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, |
@@ -371,41 +401,46 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | |||
371 | if (!seq_putting) | 401 | if (!seq_putting) |
372 | return; | 402 | return; |
373 | 403 | ||
374 | BUG_ON(!(elem->flags & 1)); | ||
375 | spin_lock(&fs_info->tree_mod_seq_lock); | 404 | spin_lock(&fs_info->tree_mod_seq_lock); |
376 | list_del(&elem->list); | 405 | list_del(&elem->list); |
406 | elem->seq = 0; | ||
377 | 407 | ||
378 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | 408 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { |
379 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | 409 | if (cur_elem->seq < min_seq) { |
380 | if (seq_putting > cur_elem->seq) { | 410 | if (seq_putting > cur_elem->seq) { |
381 | /* | 411 | /* |
382 | * blocker with lower sequence number exists, we | 412 | * blocker with lower sequence number exists, we |
383 | * cannot remove anything from the log | 413 | * cannot remove anything from the log |
384 | */ | 414 | */ |
385 | goto out; | 415 | spin_unlock(&fs_info->tree_mod_seq_lock); |
416 | return; | ||
386 | } | 417 | } |
387 | min_seq = cur_elem->seq; | 418 | min_seq = cur_elem->seq; |
388 | } | 419 | } |
389 | } | 420 | } |
421 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
422 | |||
423 | /* | ||
424 | * we removed the lowest blocker from the blocker list, so there may be | ||
425 | * more processible delayed refs. | ||
426 | */ | ||
427 | wake_up(&fs_info->tree_mod_seq_wait); | ||
390 | 428 | ||
391 | /* | 429 | /* |
392 | * anything that's lower than the lowest existing (read: blocked) | 430 | * anything that's lower than the lowest existing (read: blocked) |
393 | * sequence number can be removed from the tree. | 431 | * sequence number can be removed from the tree. |
394 | */ | 432 | */ |
395 | write_lock(&fs_info->tree_mod_log_lock); | 433 | tree_mod_log_write_lock(fs_info); |
396 | tm_root = &fs_info->tree_mod_log; | 434 | tm_root = &fs_info->tree_mod_log; |
397 | for (node = rb_first(tm_root); node; node = next) { | 435 | for (node = rb_first(tm_root); node; node = next) { |
398 | next = rb_next(node); | 436 | next = rb_next(node); |
399 | tm = container_of(node, struct tree_mod_elem, node); | 437 | tm = container_of(node, struct tree_mod_elem, node); |
400 | if (tm->elem.seq > min_seq) | 438 | if (tm->seq > min_seq) |
401 | continue; | 439 | continue; |
402 | rb_erase(node, tm_root); | 440 | rb_erase(node, tm_root); |
403 | list_del(&tm->elem.list); | ||
404 | kfree(tm); | 441 | kfree(tm); |
405 | } | 442 | } |
406 | write_unlock(&fs_info->tree_mod_log_lock); | 443 | tree_mod_log_write_unlock(fs_info); |
407 | out: | ||
408 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
409 | } | 444 | } |
410 | 445 | ||
411 | /* | 446 | /* |
@@ -423,11 +458,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | |||
423 | struct rb_node **new; | 458 | struct rb_node **new; |
424 | struct rb_node *parent = NULL; | 459 | struct rb_node *parent = NULL; |
425 | struct tree_mod_elem *cur; | 460 | struct tree_mod_elem *cur; |
426 | int ret = 0; | ||
427 | 461 | ||
428 | BUG_ON(!tm || !tm->elem.seq); | 462 | BUG_ON(!tm || !tm->seq); |
429 | 463 | ||
430 | write_lock(&fs_info->tree_mod_log_lock); | ||
431 | tm_root = &fs_info->tree_mod_log; | 464 | tm_root = &fs_info->tree_mod_log; |
432 | new = &tm_root->rb_node; | 465 | new = &tm_root->rb_node; |
433 | while (*new) { | 466 | while (*new) { |
@@ -437,88 +470,81 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | |||
437 | new = &((*new)->rb_left); | 470 | new = &((*new)->rb_left); |
438 | else if (cur->index > tm->index) | 471 | else if (cur->index > tm->index) |
439 | new = &((*new)->rb_right); | 472 | new = &((*new)->rb_right); |
440 | else if (cur->elem.seq < tm->elem.seq) | 473 | else if (cur->seq < tm->seq) |
441 | new = &((*new)->rb_left); | 474 | new = &((*new)->rb_left); |
442 | else if (cur->elem.seq > tm->elem.seq) | 475 | else if (cur->seq > tm->seq) |
443 | new = &((*new)->rb_right); | 476 | new = &((*new)->rb_right); |
444 | else { | 477 | else { |
445 | kfree(tm); | 478 | kfree(tm); |
446 | ret = -EEXIST; | 479 | return -EEXIST; |
447 | goto unlock; | ||
448 | } | 480 | } |
449 | } | 481 | } |
450 | 482 | ||
451 | rb_link_node(&tm->node, parent, new); | 483 | rb_link_node(&tm->node, parent, new); |
452 | rb_insert_color(&tm->node, tm_root); | 484 | rb_insert_color(&tm->node, tm_root); |
453 | unlock: | 485 | return 0; |
454 | write_unlock(&fs_info->tree_mod_log_lock); | ||
455 | return ret; | ||
456 | } | 486 | } |
457 | 487 | ||
488 | /* | ||
489 | * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it | ||
490 | * returns zero with the tree_mod_log_lock acquired. The caller must hold | ||
491 | * this until all tree mod log insertions are recorded in the rb tree and then | ||
492 | * call tree_mod_log_write_unlock() to release. | ||
493 | */ | ||
458 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | 494 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, |
459 | struct extent_buffer *eb) { | 495 | struct extent_buffer *eb) { |
460 | smp_mb(); | 496 | smp_mb(); |
461 | if (list_empty(&(fs_info)->tree_mod_seq_list)) | 497 | if (list_empty(&(fs_info)->tree_mod_seq_list)) |
462 | return 1; | 498 | return 1; |
463 | if (!eb) | 499 | if (eb && btrfs_header_level(eb) == 0) |
464 | return 0; | 500 | return 1; |
465 | if (btrfs_header_level(eb) == 0) | 501 | |
502 | tree_mod_log_write_lock(fs_info); | ||
503 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
504 | /* | ||
505 | * someone emptied the list while we were waiting for the lock. | ||
506 | * we must not add to the list when no blocker exists. | ||
507 | */ | ||
508 | tree_mod_log_write_unlock(fs_info); | ||
466 | return 1; | 509 | return 1; |
510 | } | ||
511 | |||
467 | return 0; | 512 | return 0; |
468 | } | 513 | } |
469 | 514 | ||
470 | /* | 515 | /* |
471 | * This allocates memory and gets a tree modification sequence number when | 516 | * This allocates memory and gets a tree modification sequence number. |
472 | * needed. | ||
473 | * | 517 | * |
474 | * Returns 0 when no sequence number is needed, < 0 on error. | 518 | * Returns <0 on error. |
475 | * Returns 1 when a sequence number was added. In this case, | 519 | * Returns >0 (the added sequence number) on success. |
476 | * fs_info->tree_mod_seq_lock was acquired and must be released by the caller | ||
477 | * after inserting into the rb tree. | ||
478 | */ | 520 | */ |
479 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | 521 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, |
480 | struct tree_mod_elem **tm_ret) | 522 | struct tree_mod_elem **tm_ret) |
481 | { | 523 | { |
482 | struct tree_mod_elem *tm; | 524 | struct tree_mod_elem *tm; |
483 | int seq; | ||
484 | 525 | ||
485 | if (tree_mod_dont_log(fs_info, NULL)) | 526 | /* |
486 | return 0; | 527 | * once we switch from spin locks to something different, we should |
487 | 528 | * honor the flags parameter here. | |
488 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | 529 | */ |
530 | tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); | ||
489 | if (!tm) | 531 | if (!tm) |
490 | return -ENOMEM; | 532 | return -ENOMEM; |
491 | 533 | ||
492 | tm->elem.flags = 0; | 534 | tm->seq = btrfs_inc_tree_mod_seq(fs_info); |
493 | spin_lock(&fs_info->tree_mod_seq_lock); | 535 | return tm->seq; |
494 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
495 | /* | ||
496 | * someone emptied the list while we were waiting for the lock. | ||
497 | * we must not add to the list, because no blocker exists. items | ||
498 | * are removed from the list only when the existing blocker is | ||
499 | * removed from the list. | ||
500 | */ | ||
501 | kfree(tm); | ||
502 | seq = 0; | ||
503 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
504 | } else { | ||
505 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
506 | seq = tm->elem.seq; | ||
507 | } | ||
508 | |||
509 | return seq; | ||
510 | } | 536 | } |
511 | 537 | ||
512 | static noinline int | 538 | static inline int |
513 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | 539 | __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, |
514 | struct extent_buffer *eb, int slot, | 540 | struct extent_buffer *eb, int slot, |
515 | enum mod_log_op op, gfp_t flags) | 541 | enum mod_log_op op, gfp_t flags) |
516 | { | 542 | { |
517 | struct tree_mod_elem *tm; | ||
518 | int ret; | 543 | int ret; |
544 | struct tree_mod_elem *tm; | ||
519 | 545 | ||
520 | ret = tree_mod_alloc(fs_info, flags, &tm); | 546 | ret = tree_mod_alloc(fs_info, flags, &tm); |
521 | if (ret <= 0) | 547 | if (ret < 0) |
522 | return ret; | 548 | return ret; |
523 | 549 | ||
524 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | 550 | tm->index = eb->start >> PAGE_CACHE_SHIFT; |
@@ -530,8 +556,22 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | |||
530 | tm->slot = slot; | 556 | tm->slot = slot; |
531 | tm->generation = btrfs_node_ptr_generation(eb, slot); | 557 | tm->generation = btrfs_node_ptr_generation(eb, slot); |
532 | 558 | ||
533 | ret = __tree_mod_log_insert(fs_info, tm); | 559 | return __tree_mod_log_insert(fs_info, tm); |
534 | spin_unlock(&fs_info->tree_mod_seq_lock); | 560 | } |
561 | |||
562 | static noinline int | ||
563 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
564 | struct extent_buffer *eb, int slot, | ||
565 | enum mod_log_op op, gfp_t flags) | ||
566 | { | ||
567 | int ret; | ||
568 | |||
569 | if (tree_mod_dont_log(fs_info, eb)) | ||
570 | return 0; | ||
571 | |||
572 | ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); | ||
573 | |||
574 | tree_mod_log_write_unlock(fs_info); | ||
535 | return ret; | 575 | return ret; |
536 | } | 576 | } |
537 | 577 | ||
@@ -543,6 +583,14 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
543 | } | 583 | } |
544 | 584 | ||
545 | static noinline int | 585 | static noinline int |
586 | tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, | ||
587 | struct extent_buffer *eb, int slot, | ||
588 | enum mod_log_op op) | ||
589 | { | ||
590 | return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); | ||
591 | } | ||
592 | |||
593 | static noinline int | ||
546 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | 594 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, |
547 | struct extent_buffer *eb, int dst_slot, int src_slot, | 595 | struct extent_buffer *eb, int dst_slot, int src_slot, |
548 | int nr_items, gfp_t flags) | 596 | int nr_items, gfp_t flags) |
@@ -555,14 +603,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
555 | return 0; | 603 | return 0; |
556 | 604 | ||
557 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | 605 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { |
558 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | 606 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, |
559 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | 607 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); |
560 | BUG_ON(ret < 0); | 608 | BUG_ON(ret < 0); |
561 | } | 609 | } |
562 | 610 | ||
563 | ret = tree_mod_alloc(fs_info, flags, &tm); | 611 | ret = tree_mod_alloc(fs_info, flags, &tm); |
564 | if (ret <= 0) | 612 | if (ret < 0) |
565 | return ret; | 613 | goto out; |
566 | 614 | ||
567 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | 615 | tm->index = eb->start >> PAGE_CACHE_SHIFT; |
568 | tm->slot = src_slot; | 616 | tm->slot = src_slot; |
@@ -571,10 +619,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
571 | tm->op = MOD_LOG_MOVE_KEYS; | 619 | tm->op = MOD_LOG_MOVE_KEYS; |
572 | 620 | ||
573 | ret = __tree_mod_log_insert(fs_info, tm); | 621 | ret = __tree_mod_log_insert(fs_info, tm); |
574 | spin_unlock(&fs_info->tree_mod_seq_lock); | 622 | out: |
623 | tree_mod_log_write_unlock(fs_info); | ||
575 | return ret; | 624 | return ret; |
576 | } | 625 | } |
577 | 626 | ||
627 | static inline void | ||
628 | __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) | ||
629 | { | ||
630 | int i; | ||
631 | u32 nritems; | ||
632 | int ret; | ||
633 | |||
634 | nritems = btrfs_header_nritems(eb); | ||
635 | for (i = nritems - 1; i >= 0; i--) { | ||
636 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i, | ||
637 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
638 | BUG_ON(ret < 0); | ||
639 | } | ||
640 | } | ||
641 | |||
578 | static noinline int | 642 | static noinline int |
579 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | 643 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, |
580 | struct extent_buffer *old_root, | 644 | struct extent_buffer *old_root, |
@@ -583,9 +647,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
583 | struct tree_mod_elem *tm; | 647 | struct tree_mod_elem *tm; |
584 | int ret; | 648 | int ret; |
585 | 649 | ||
650 | if (tree_mod_dont_log(fs_info, NULL)) | ||
651 | return 0; | ||
652 | |||
653 | __tree_mod_log_free_eb(fs_info, old_root); | ||
654 | |||
586 | ret = tree_mod_alloc(fs_info, flags, &tm); | 655 | ret = tree_mod_alloc(fs_info, flags, &tm); |
587 | if (ret <= 0) | 656 | if (ret < 0) |
588 | return ret; | 657 | goto out; |
589 | 658 | ||
590 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | 659 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; |
591 | tm->old_root.logical = old_root->start; | 660 | tm->old_root.logical = old_root->start; |
@@ -594,7 +663,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
594 | tm->op = MOD_LOG_ROOT_REPLACE; | 663 | tm->op = MOD_LOG_ROOT_REPLACE; |
595 | 664 | ||
596 | ret = __tree_mod_log_insert(fs_info, tm); | 665 | ret = __tree_mod_log_insert(fs_info, tm); |
597 | spin_unlock(&fs_info->tree_mod_seq_lock); | 666 | out: |
667 | tree_mod_log_write_unlock(fs_info); | ||
598 | return ret; | 668 | return ret; |
599 | } | 669 | } |
600 | 670 | ||
@@ -608,7 +678,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | |||
608 | struct tree_mod_elem *found = NULL; | 678 | struct tree_mod_elem *found = NULL; |
609 | u64 index = start >> PAGE_CACHE_SHIFT; | 679 | u64 index = start >> PAGE_CACHE_SHIFT; |
610 | 680 | ||
611 | read_lock(&fs_info->tree_mod_log_lock); | 681 | tree_mod_log_read_lock(fs_info); |
612 | tm_root = &fs_info->tree_mod_log; | 682 | tm_root = &fs_info->tree_mod_log; |
613 | node = tm_root->rb_node; | 683 | node = tm_root->rb_node; |
614 | while (node) { | 684 | while (node) { |
@@ -617,18 +687,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | |||
617 | node = node->rb_left; | 687 | node = node->rb_left; |
618 | } else if (cur->index > index) { | 688 | } else if (cur->index > index) { |
619 | node = node->rb_right; | 689 | node = node->rb_right; |
620 | } else if (cur->elem.seq < min_seq) { | 690 | } else if (cur->seq < min_seq) { |
621 | node = node->rb_left; | 691 | node = node->rb_left; |
622 | } else if (!smallest) { | 692 | } else if (!smallest) { |
623 | /* we want the node with the highest seq */ | 693 | /* we want the node with the highest seq */ |
624 | if (found) | 694 | if (found) |
625 | BUG_ON(found->elem.seq > cur->elem.seq); | 695 | BUG_ON(found->seq > cur->seq); |
626 | found = cur; | 696 | found = cur; |
627 | node = node->rb_left; | 697 | node = node->rb_left; |
628 | } else if (cur->elem.seq > min_seq) { | 698 | } else if (cur->seq > min_seq) { |
629 | /* we want the node with the smallest seq */ | 699 | /* we want the node with the smallest seq */ |
630 | if (found) | 700 | if (found) |
631 | BUG_ON(found->elem.seq < cur->elem.seq); | 701 | BUG_ON(found->seq < cur->seq); |
632 | found = cur; | 702 | found = cur; |
633 | node = node->rb_right; | 703 | node = node->rb_right; |
634 | } else { | 704 | } else { |
@@ -636,7 +706,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | |||
636 | break; | 706 | break; |
637 | } | 707 | } |
638 | } | 708 | } |
639 | read_unlock(&fs_info->tree_mod_log_lock); | 709 | tree_mod_log_read_unlock(fs_info); |
640 | 710 | ||
641 | return found; | 711 | return found; |
642 | } | 712 | } |
@@ -664,7 +734,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | |||
664 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | 734 | return __tree_mod_log_search(fs_info, start, min_seq, 0); |
665 | } | 735 | } |
666 | 736 | ||
667 | static inline void | 737 | static noinline void |
668 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | 738 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, |
669 | struct extent_buffer *src, unsigned long dst_offset, | 739 | struct extent_buffer *src, unsigned long dst_offset, |
670 | unsigned long src_offset, int nr_items) | 740 | unsigned long src_offset, int nr_items) |
@@ -675,18 +745,23 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
675 | if (tree_mod_dont_log(fs_info, NULL)) | 745 | if (tree_mod_dont_log(fs_info, NULL)) |
676 | return; | 746 | return; |
677 | 747 | ||
678 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | 748 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { |
749 | tree_mod_log_write_unlock(fs_info); | ||
679 | return; | 750 | return; |
751 | } | ||
680 | 752 | ||
681 | /* speed this up by single seq for all operations? */ | ||
682 | for (i = 0; i < nr_items; i++) { | 753 | for (i = 0; i < nr_items; i++) { |
683 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | 754 | ret = tree_mod_log_insert_key_locked(fs_info, src, |
684 | MOD_LOG_KEY_REMOVE); | 755 | i + src_offset, |
756 | MOD_LOG_KEY_REMOVE); | ||
685 | BUG_ON(ret < 0); | 757 | BUG_ON(ret < 0); |
686 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | 758 | ret = tree_mod_log_insert_key_locked(fs_info, dst, |
687 | MOD_LOG_KEY_ADD); | 759 | i + dst_offset, |
760 | MOD_LOG_KEY_ADD); | ||
688 | BUG_ON(ret < 0); | 761 | BUG_ON(ret < 0); |
689 | } | 762 | } |
763 | |||
764 | tree_mod_log_write_unlock(fs_info); | ||
690 | } | 765 | } |
691 | 766 | ||
692 | static inline void | 767 | static inline void |
@@ -699,7 +774,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
699 | BUG_ON(ret < 0); | 774 | BUG_ON(ret < 0); |
700 | } | 775 | } |
701 | 776 | ||
702 | static inline void | 777 | static noinline void |
703 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | 778 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, |
704 | struct extent_buffer *eb, | 779 | struct extent_buffer *eb, |
705 | struct btrfs_disk_key *disk_key, int slot, int atomic) | 780 | struct btrfs_disk_key *disk_key, int slot, int atomic) |
@@ -712,30 +787,22 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | |||
712 | BUG_ON(ret < 0); | 787 | BUG_ON(ret < 0); |
713 | } | 788 | } |
714 | 789 | ||
715 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | 790 | static noinline void |
716 | struct extent_buffer *eb) | 791 | tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) |
717 | { | 792 | { |
718 | int i; | ||
719 | int ret; | ||
720 | u32 nritems; | ||
721 | |||
722 | if (tree_mod_dont_log(fs_info, eb)) | 793 | if (tree_mod_dont_log(fs_info, eb)) |
723 | return; | 794 | return; |
724 | 795 | ||
725 | nritems = btrfs_header_nritems(eb); | 796 | __tree_mod_log_free_eb(fs_info, eb); |
726 | for (i = nritems - 1; i >= 0; i--) { | 797 | |
727 | ret = tree_mod_log_insert_key(fs_info, eb, i, | 798 | tree_mod_log_write_unlock(fs_info); |
728 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
729 | BUG_ON(ret < 0); | ||
730 | } | ||
731 | } | 799 | } |
732 | 800 | ||
733 | static inline void | 801 | static noinline void |
734 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | 802 | tree_mod_log_set_root_pointer(struct btrfs_root *root, |
735 | struct extent_buffer *new_root_node) | 803 | struct extent_buffer *new_root_node) |
736 | { | 804 | { |
737 | int ret; | 805 | int ret; |
738 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
739 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | 806 | ret = tree_mod_log_insert_root(root->fs_info, root->node, |
740 | new_root_node, GFP_NOFS); | 807 | new_root_node, GFP_NOFS); |
741 | BUG_ON(ret < 0); | 808 | BUG_ON(ret < 0); |
@@ -1069,7 +1136,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
1069 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | 1136 | unsigned long p_size = sizeof(struct btrfs_key_ptr); |
1070 | 1137 | ||
1071 | n = btrfs_header_nritems(eb); | 1138 | n = btrfs_header_nritems(eb); |
1072 | while (tm && tm->elem.seq >= time_seq) { | 1139 | while (tm && tm->seq >= time_seq) { |
1073 | /* | 1140 | /* |
1074 | * all the operations are recorded with the operator used for | 1141 | * all the operations are recorded with the operator used for |
1075 | * the modification. as we're going backwards, we do the | 1142 | * the modification. as we're going backwards, we do the |
@@ -2722,6 +2789,78 @@ done: | |||
2722 | } | 2789 | } |
2723 | 2790 | ||
2724 | /* | 2791 | /* |
2792 | * helper to use instead of search slot if no exact match is needed but | ||
2793 | * instead the next or previous item should be returned. | ||
2794 | * When find_higher is true, the next higher item is returned, the next lower | ||
2795 | * otherwise. | ||
2796 | * When return_any and find_higher are both true, and no higher item is found, | ||
2797 | * return the next lower instead. | ||
2798 | * When return_any is true and find_higher is false, and no lower item is found, | ||
2799 | * return the next higher instead. | ||
2800 | * It returns 0 if any item is found, 1 if none is found (tree empty), and | ||
2801 | * < 0 on error | ||
2802 | */ | ||
2803 | int btrfs_search_slot_for_read(struct btrfs_root *root, | ||
2804 | struct btrfs_key *key, struct btrfs_path *p, | ||
2805 | int find_higher, int return_any) | ||
2806 | { | ||
2807 | int ret; | ||
2808 | struct extent_buffer *leaf; | ||
2809 | |||
2810 | again: | ||
2811 | ret = btrfs_search_slot(NULL, root, key, p, 0, 0); | ||
2812 | if (ret <= 0) | ||
2813 | return ret; | ||
2814 | /* | ||
2815 | * a return value of 1 means the path is at the position where the | ||
2816 | * item should be inserted. Normally this is the next bigger item, | ||
2817 | * but in case the previous item is the last in a leaf, path points | ||
2818 | * to the first free slot in the previous leaf, i.e. at an invalid | ||
2819 | * item. | ||
2820 | */ | ||
2821 | leaf = p->nodes[0]; | ||
2822 | |||
2823 | if (find_higher) { | ||
2824 | if (p->slots[0] >= btrfs_header_nritems(leaf)) { | ||
2825 | ret = btrfs_next_leaf(root, p); | ||
2826 | if (ret <= 0) | ||
2827 | return ret; | ||
2828 | if (!return_any) | ||
2829 | return 1; | ||
2830 | /* | ||
2831 | * no higher item found, return the next | ||
2832 | * lower instead | ||
2833 | */ | ||
2834 | return_any = 0; | ||
2835 | find_higher = 0; | ||
2836 | btrfs_release_path(p); | ||
2837 | goto again; | ||
2838 | } | ||
2839 | } else { | ||
2840 | if (p->slots[0] >= btrfs_header_nritems(leaf)) { | ||
2841 | /* we're sitting on an invalid slot */ | ||
2842 | if (p->slots[0] == 0) { | ||
2843 | ret = btrfs_prev_leaf(root, p); | ||
2844 | if (ret <= 0) | ||
2845 | return ret; | ||
2846 | if (!return_any) | ||
2847 | return 1; | ||
2848 | /* | ||
2849 | * no lower item found, return the next | ||
2850 | * higher instead | ||
2851 | */ | ||
2852 | return_any = 0; | ||
2853 | find_higher = 1; | ||
2854 | btrfs_release_path(p); | ||
2855 | goto again; | ||
2856 | } | ||
2857 | --p->slots[0]; | ||
2858 | } | ||
2859 | } | ||
2860 | return 0; | ||
2861 | } | ||
2862 | |||
2863 | /* | ||
2725 | * adjust the pointers going up the tree, starting at level | 2864 | * adjust the pointers going up the tree, starting at level |
2726 | * making sure the right key of each node is points to 'key'. | 2865 | * making sure the right key of each node is points to 'key'. |
2727 | * This is used after shifting pointers to the left, so it stops | 2866 | * This is used after shifting pointers to the left, so it stops |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a0ee2f8e0566..00f9a50f986d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -91,6 +91,9 @@ struct btrfs_ordered_sum; | |||
91 | /* for storing balance parameters in the root tree */ | 91 | /* for storing balance parameters in the root tree */ |
92 | #define BTRFS_BALANCE_OBJECTID -4ULL | 92 | #define BTRFS_BALANCE_OBJECTID -4ULL |
93 | 93 | ||
94 | /* holds quota configuration and tracking */ | ||
95 | #define BTRFS_QUOTA_TREE_OBJECTID 8ULL | ||
96 | |||
94 | /* orhpan objectid for tracking unlinked/truncated files */ | 97 | /* orhpan objectid for tracking unlinked/truncated files */ |
95 | #define BTRFS_ORPHAN_OBJECTID -5ULL | 98 | #define BTRFS_ORPHAN_OBJECTID -5ULL |
96 | 99 | ||
@@ -883,6 +886,72 @@ struct btrfs_block_group_item { | |||
883 | __le64 flags; | 886 | __le64 flags; |
884 | } __attribute__ ((__packed__)); | 887 | } __attribute__ ((__packed__)); |
885 | 888 | ||
889 | /* | ||
890 | * is subvolume quota turned on? | ||
891 | */ | ||
892 | #define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) | ||
893 | /* | ||
894 | * SCANNING is set during the initialization phase | ||
895 | */ | ||
896 | #define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) | ||
897 | /* | ||
898 | * Some qgroup entries are known to be out of date, | ||
899 | * either because the configuration has changed in a way that | ||
900 | * makes a rescan necessary, or because the fs has been mounted | ||
901 | * with a non-qgroup-aware version. | ||
902 | * Turning qouta off and on again makes it inconsistent, too. | ||
903 | */ | ||
904 | #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) | ||
905 | |||
906 | #define BTRFS_QGROUP_STATUS_VERSION 1 | ||
907 | |||
908 | struct btrfs_qgroup_status_item { | ||
909 | __le64 version; | ||
910 | /* | ||
911 | * the generation is updated during every commit. As older | ||
912 | * versions of btrfs are not aware of qgroups, it will be | ||
913 | * possible to detect inconsistencies by checking the | ||
914 | * generation on mount time | ||
915 | */ | ||
916 | __le64 generation; | ||
917 | |||
918 | /* flag definitions see above */ | ||
919 | __le64 flags; | ||
920 | |||
921 | /* | ||
922 | * only used during scanning to record the progress | ||
923 | * of the scan. It contains a logical address | ||
924 | */ | ||
925 | __le64 scan; | ||
926 | } __attribute__ ((__packed__)); | ||
927 | |||
928 | struct btrfs_qgroup_info_item { | ||
929 | __le64 generation; | ||
930 | __le64 rfer; | ||
931 | __le64 rfer_cmpr; | ||
932 | __le64 excl; | ||
933 | __le64 excl_cmpr; | ||
934 | } __attribute__ ((__packed__)); | ||
935 | |||
936 | /* flags definition for qgroup limits */ | ||
937 | #define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) | ||
938 | #define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) | ||
939 | #define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) | ||
940 | #define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) | ||
941 | #define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) | ||
942 | #define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) | ||
943 | |||
944 | struct btrfs_qgroup_limit_item { | ||
945 | /* | ||
946 | * only updated when any of the other values change | ||
947 | */ | ||
948 | __le64 flags; | ||
949 | __le64 max_rfer; | ||
950 | __le64 max_excl; | ||
951 | __le64 rsv_rfer; | ||
952 | __le64 rsv_excl; | ||
953 | } __attribute__ ((__packed__)); | ||
954 | |||
886 | struct btrfs_space_info { | 955 | struct btrfs_space_info { |
887 | u64 flags; | 956 | u64 flags; |
888 | 957 | ||
@@ -1030,6 +1099,13 @@ struct btrfs_block_group_cache { | |||
1030 | struct list_head cluster_list; | 1099 | struct list_head cluster_list; |
1031 | }; | 1100 | }; |
1032 | 1101 | ||
1102 | /* delayed seq elem */ | ||
1103 | struct seq_list { | ||
1104 | struct list_head list; | ||
1105 | u64 seq; | ||
1106 | }; | ||
1107 | |||
1108 | /* fs_info */ | ||
1033 | struct reloc_control; | 1109 | struct reloc_control; |
1034 | struct btrfs_device; | 1110 | struct btrfs_device; |
1035 | struct btrfs_fs_devices; | 1111 | struct btrfs_fs_devices; |
@@ -1044,6 +1120,7 @@ struct btrfs_fs_info { | |||
1044 | struct btrfs_root *dev_root; | 1120 | struct btrfs_root *dev_root; |
1045 | struct btrfs_root *fs_root; | 1121 | struct btrfs_root *fs_root; |
1046 | struct btrfs_root *csum_root; | 1122 | struct btrfs_root *csum_root; |
1123 | struct btrfs_root *quota_root; | ||
1047 | 1124 | ||
1048 | /* the log root tree is a directory of all the other log roots */ | 1125 | /* the log root tree is a directory of all the other log roots */ |
1049 | struct btrfs_root *log_root_tree; | 1126 | struct btrfs_root *log_root_tree; |
@@ -1144,6 +1221,8 @@ struct btrfs_fs_info { | |||
1144 | spinlock_t tree_mod_seq_lock; | 1221 | spinlock_t tree_mod_seq_lock; |
1145 | atomic_t tree_mod_seq; | 1222 | atomic_t tree_mod_seq; |
1146 | struct list_head tree_mod_seq_list; | 1223 | struct list_head tree_mod_seq_list; |
1224 | struct seq_list tree_mod_seq_elem; | ||
1225 | wait_queue_head_t tree_mod_seq_wait; | ||
1147 | 1226 | ||
1148 | /* this protects tree_mod_log */ | 1227 | /* this protects tree_mod_log */ |
1149 | rwlock_t tree_mod_log_lock; | 1228 | rwlock_t tree_mod_log_lock; |
@@ -1298,6 +1377,29 @@ struct btrfs_fs_info { | |||
1298 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1377 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
1299 | u32 check_integrity_print_mask; | 1378 | u32 check_integrity_print_mask; |
1300 | #endif | 1379 | #endif |
1380 | /* | ||
1381 | * quota information | ||
1382 | */ | ||
1383 | unsigned int quota_enabled:1; | ||
1384 | |||
1385 | /* | ||
1386 | * quota_enabled only changes state after a commit. This holds the | ||
1387 | * next state. | ||
1388 | */ | ||
1389 | unsigned int pending_quota_state:1; | ||
1390 | |||
1391 | /* is qgroup tracking in a consistent state? */ | ||
1392 | u64 qgroup_flags; | ||
1393 | |||
1394 | /* holds configuration and tracking. Protected by qgroup_lock */ | ||
1395 | struct rb_root qgroup_tree; | ||
1396 | spinlock_t qgroup_lock; | ||
1397 | |||
1398 | /* list of dirty qgroups to be written at next commit */ | ||
1399 | struct list_head dirty_qgroups; | ||
1400 | |||
1401 | /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ | ||
1402 | u64 qgroup_seq; | ||
1301 | 1403 | ||
1302 | /* filesystem state */ | 1404 | /* filesystem state */ |
1303 | u64 fs_state; | 1405 | u64 fs_state; |
@@ -1527,6 +1629,30 @@ struct btrfs_ioctl_defrag_range_args { | |||
1527 | #define BTRFS_DEV_ITEM_KEY 216 | 1629 | #define BTRFS_DEV_ITEM_KEY 216 |
1528 | #define BTRFS_CHUNK_ITEM_KEY 228 | 1630 | #define BTRFS_CHUNK_ITEM_KEY 228 |
1529 | 1631 | ||
1632 | /* | ||
1633 | * Records the overall state of the qgroups. | ||
1634 | * There's only one instance of this key present, | ||
1635 | * (0, BTRFS_QGROUP_STATUS_KEY, 0) | ||
1636 | */ | ||
1637 | #define BTRFS_QGROUP_STATUS_KEY 240 | ||
1638 | /* | ||
1639 | * Records the currently used space of the qgroup. | ||
1640 | * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). | ||
1641 | */ | ||
1642 | #define BTRFS_QGROUP_INFO_KEY 242 | ||
1643 | /* | ||
1644 | * Contains the user configured limits for the qgroup. | ||
1645 | * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). | ||
1646 | */ | ||
1647 | #define BTRFS_QGROUP_LIMIT_KEY 244 | ||
1648 | /* | ||
1649 | * Records the child-parent relationship of qgroups. For | ||
1650 | * each relation, 2 keys are present: | ||
1651 | * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) | ||
1652 | * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) | ||
1653 | */ | ||
1654 | #define BTRFS_QGROUP_RELATION_KEY 246 | ||
1655 | |||
1530 | #define BTRFS_BALANCE_ITEM_KEY 248 | 1656 | #define BTRFS_BALANCE_ITEM_KEY 248 |
1531 | 1657 | ||
1532 | /* | 1658 | /* |
@@ -2508,6 +2634,49 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, | |||
2508 | sizeof(val)); | 2634 | sizeof(val)); |
2509 | } | 2635 | } |
2510 | 2636 | ||
2637 | /* btrfs_qgroup_status_item */ | ||
2638 | BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, | ||
2639 | generation, 64); | ||
2640 | BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, | ||
2641 | version, 64); | ||
2642 | BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, | ||
2643 | flags, 64); | ||
2644 | BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, | ||
2645 | scan, 64); | ||
2646 | |||
2647 | /* btrfs_qgroup_info_item */ | ||
2648 | BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, | ||
2649 | generation, 64); | ||
2650 | BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); | ||
2651 | BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, | ||
2652 | rfer_cmpr, 64); | ||
2653 | BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); | ||
2654 | BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, | ||
2655 | excl_cmpr, 64); | ||
2656 | |||
2657 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, | ||
2658 | struct btrfs_qgroup_info_item, generation, 64); | ||
2659 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item, | ||
2660 | rfer, 64); | ||
2661 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr, | ||
2662 | struct btrfs_qgroup_info_item, rfer_cmpr, 64); | ||
2663 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item, | ||
2664 | excl, 64); | ||
2665 | BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr, | ||
2666 | struct btrfs_qgroup_info_item, excl_cmpr, 64); | ||
2667 | |||
2668 | /* btrfs_qgroup_limit_item */ | ||
2669 | BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, | ||
2670 | flags, 64); | ||
2671 | BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, | ||
2672 | max_rfer, 64); | ||
2673 | BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, | ||
2674 | max_excl, 64); | ||
2675 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, | ||
2676 | rsv_rfer, 64); | ||
2677 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, | ||
2678 | rsv_excl, 64); | ||
2679 | |||
2511 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 2680 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
2512 | { | 2681 | { |
2513 | return sb->s_fs_info; | 2682 | return sb->s_fs_info; |
@@ -2703,6 +2872,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2703 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | 2872 | int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); |
2704 | 2873 | ||
2705 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | 2874 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); |
2875 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
2876 | struct btrfs_fs_info *fs_info); | ||
2706 | /* ctree.c */ | 2877 | /* ctree.c */ |
2707 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2878 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2708 | int level, int *slot); | 2879 | int level, int *slot); |
@@ -2753,6 +2924,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2753 | ins_len, int cow); | 2924 | ins_len, int cow); |
2754 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | 2925 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, |
2755 | struct btrfs_path *p, u64 time_seq); | 2926 | struct btrfs_path *p, u64 time_seq); |
2927 | int btrfs_search_slot_for_read(struct btrfs_root *root, | ||
2928 | struct btrfs_key *key, struct btrfs_path *p, | ||
2929 | int find_higher, int return_any); | ||
2756 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 2930 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
2757 | struct btrfs_root *root, struct extent_buffer *parent, | 2931 | struct btrfs_root *root, struct extent_buffer *parent, |
2758 | int start_slot, int cache_only, u64 *last_ret, | 2932 | int start_slot, int cache_only, u64 *last_ret, |
@@ -2835,11 +3009,22 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) | |||
2835 | kfree(fs_info->chunk_root); | 3009 | kfree(fs_info->chunk_root); |
2836 | kfree(fs_info->dev_root); | 3010 | kfree(fs_info->dev_root); |
2837 | kfree(fs_info->csum_root); | 3011 | kfree(fs_info->csum_root); |
3012 | kfree(fs_info->quota_root); | ||
2838 | kfree(fs_info->super_copy); | 3013 | kfree(fs_info->super_copy); |
2839 | kfree(fs_info->super_for_commit); | 3014 | kfree(fs_info->super_for_commit); |
2840 | kfree(fs_info); | 3015 | kfree(fs_info); |
2841 | } | 3016 | } |
2842 | 3017 | ||
3018 | /* tree mod log functions from ctree.c */ | ||
3019 | u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3020 | struct seq_list *elem); | ||
3021 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3022 | struct seq_list *elem); | ||
3023 | static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) | ||
3024 | { | ||
3025 | return atomic_inc_return(&fs_info->tree_mod_seq); | ||
3026 | } | ||
3027 | |||
2843 | /* root-item.c */ | 3028 | /* root-item.c */ |
2844 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 3029 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
2845 | struct btrfs_path *path, | 3030 | struct btrfs_path *path, |
@@ -3198,17 +3383,49 @@ void btrfs_reada_detach(void *handle); | |||
3198 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 3383 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
3199 | u64 start, int err); | 3384 | u64 start, int err); |
3200 | 3385 | ||
3201 | /* delayed seq elem */ | 3386 | /* qgroup.c */ |
3202 | struct seq_list { | 3387 | struct qgroup_update { |
3203 | struct list_head list; | 3388 | struct list_head list; |
3204 | u64 seq; | 3389 | struct btrfs_delayed_ref_node *node; |
3205 | u32 flags; | 3390 | struct btrfs_delayed_extent_op *extent_op; |
3206 | }; | 3391 | }; |
3207 | 3392 | ||
3208 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | 3393 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, |
3209 | struct seq_list *elem); | 3394 | struct btrfs_fs_info *fs_info); |
3210 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | 3395 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
3211 | struct seq_list *elem); | 3396 | struct btrfs_fs_info *fs_info); |
3397 | int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); | ||
3398 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
3399 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
3400 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
3401 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | ||
3402 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
3403 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
3404 | char *name); | ||
3405 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
3406 | struct btrfs_fs_info *fs_info, u64 qgroupid); | ||
3407 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
3408 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
3409 | struct btrfs_qgroup_limit *limit); | ||
3410 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); | ||
3411 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); | ||
3412 | struct btrfs_delayed_extent_op; | ||
3413 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
3414 | struct btrfs_delayed_ref_node *node, | ||
3415 | struct btrfs_delayed_extent_op *extent_op); | ||
3416 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | ||
3417 | struct btrfs_fs_info *fs_info, | ||
3418 | struct btrfs_delayed_ref_node *node, | ||
3419 | struct btrfs_delayed_extent_op *extent_op); | ||
3420 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
3421 | struct btrfs_fs_info *fs_info); | ||
3422 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
3423 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
3424 | struct btrfs_qgroup_inherit *inherit); | ||
3425 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); | ||
3426 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); | ||
3427 | |||
3428 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); | ||
3212 | 3429 | ||
3213 | static inline int is_fstree(u64 rootid) | 3430 | static inline int is_fstree(u64 rootid) |
3214 | { | 3431 | { |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 13ae7b04790e..da7419ed01bb 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
233 | return 0; | 233 | return 0; |
234 | } | 234 | } |
235 | 235 | ||
236 | int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 236 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, |
237 | struct btrfs_delayed_ref_root *delayed_refs, | ||
237 | u64 seq) | 238 | u64 seq) |
238 | { | 239 | { |
239 | struct seq_list *elem; | 240 | struct seq_list *elem; |
240 | 241 | int ret = 0; | |
241 | assert_spin_locked(&delayed_refs->lock); | 242 | |
242 | if (list_empty(&delayed_refs->seq_head)) | 243 | spin_lock(&fs_info->tree_mod_seq_lock); |
243 | return 0; | 244 | if (!list_empty(&fs_info->tree_mod_seq_list)) { |
244 | 245 | elem = list_first_entry(&fs_info->tree_mod_seq_list, | |
245 | elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); | 246 | struct seq_list, list); |
246 | if (seq >= elem->seq) { | 247 | if (seq >= elem->seq) { |
247 | pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", | 248 | pr_debug("holding back delayed_ref %llu, lowest is " |
248 | seq, elem->seq, delayed_refs); | 249 | "%llu (%p)\n", seq, elem->seq, delayed_refs); |
249 | return 1; | 250 | ret = 1; |
251 | } | ||
250 | } | 252 | } |
251 | return 0; | 253 | |
254 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
255 | return ret; | ||
252 | } | 256 | } |
253 | 257 | ||
254 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 258 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
@@ -525,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
525 | ref->is_head = 0; | 529 | ref->is_head = 0; |
526 | ref->in_tree = 1; | 530 | ref->in_tree = 1; |
527 | 531 | ||
528 | if (is_fstree(ref_root)) | 532 | if (need_ref_seq(for_cow, ref_root)) |
529 | seq = inc_delayed_seq(delayed_refs); | 533 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); |
530 | ref->seq = seq; | 534 | ref->seq = seq; |
531 | 535 | ||
532 | full_ref = btrfs_delayed_node_to_tree_ref(ref); | 536 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
@@ -584,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
584 | ref->is_head = 0; | 588 | ref->is_head = 0; |
585 | ref->in_tree = 1; | 589 | ref->in_tree = 1; |
586 | 590 | ||
587 | if (is_fstree(ref_root)) | 591 | if (need_ref_seq(for_cow, ref_root)) |
588 | seq = inc_delayed_seq(delayed_refs); | 592 | seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem); |
589 | ref->seq = seq; | 593 | ref->seq = seq; |
590 | 594 | ||
591 | full_ref = btrfs_delayed_node_to_data_ref(ref); | 595 | full_ref = btrfs_delayed_node_to_data_ref(ref); |
@@ -658,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, | 662 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
659 | num_bytes, parent, ref_root, level, action, | 663 | num_bytes, parent, ref_root, level, action, |
660 | for_cow); | 664 | for_cow); |
661 | if (!is_fstree(ref_root) && | 665 | if (!need_ref_seq(for_cow, ref_root) && |
662 | waitqueue_active(&delayed_refs->seq_wait)) | 666 | waitqueue_active(&fs_info->tree_mod_seq_wait)) |
663 | wake_up(&delayed_refs->seq_wait); | 667 | wake_up(&fs_info->tree_mod_seq_wait); |
664 | spin_unlock(&delayed_refs->lock); | 668 | spin_unlock(&delayed_refs->lock); |
669 | if (need_ref_seq(for_cow, ref_root)) | ||
670 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
665 | 671 | ||
666 | return 0; | 672 | return 0; |
667 | } | 673 | } |
@@ -707,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
707 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, | 713 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
708 | num_bytes, parent, ref_root, owner, offset, | 714 | num_bytes, parent, ref_root, owner, offset, |
709 | action, for_cow); | 715 | action, for_cow); |
710 | if (!is_fstree(ref_root) && | 716 | if (!need_ref_seq(for_cow, ref_root) && |
711 | waitqueue_active(&delayed_refs->seq_wait)) | 717 | waitqueue_active(&fs_info->tree_mod_seq_wait)) |
712 | wake_up(&delayed_refs->seq_wait); | 718 | wake_up(&fs_info->tree_mod_seq_wait); |
713 | spin_unlock(&delayed_refs->lock); | 719 | spin_unlock(&delayed_refs->lock); |
720 | if (need_ref_seq(for_cow, ref_root)) | ||
721 | btrfs_qgroup_record_ref(trans, &ref->node, extent_op); | ||
714 | 722 | ||
715 | return 0; | 723 | return 0; |
716 | } | 724 | } |
@@ -736,8 +744,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | |||
736 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, | 744 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, |
737 | extent_op->is_data); | 745 | extent_op->is_data); |
738 | 746 | ||
739 | if (waitqueue_active(&delayed_refs->seq_wait)) | 747 | if (waitqueue_active(&fs_info->tree_mod_seq_wait)) |
740 | wake_up(&delayed_refs->seq_wait); | 748 | wake_up(&fs_info->tree_mod_seq_wait); |
741 | spin_unlock(&delayed_refs->lock); | 749 | spin_unlock(&delayed_refs->lock); |
742 | return 0; | 750 | return 0; |
743 | } | 751 | } |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 413927fb9957..0d7c90c366b6 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root { | |||
139 | int flushing; | 139 | int flushing; |
140 | 140 | ||
141 | u64 run_delayed_start; | 141 | u64 run_delayed_start; |
142 | |||
143 | /* | ||
144 | * seq number of delayed refs. We need to know if a backref was being | ||
145 | * added before the currently processed ref or afterwards. | ||
146 | */ | ||
147 | u64 seq; | ||
148 | |||
149 | /* | ||
150 | * seq_list holds a list of all seq numbers that are currently being | ||
151 | * added to the list. While walking backrefs (btrfs_find_all_roots, | ||
152 | * qgroups), which might take some time, no newer ref must be processed, | ||
153 | * as it might influence the outcome of the walk. | ||
154 | */ | ||
155 | struct list_head seq_head; | ||
156 | |||
157 | /* | ||
158 | * when the only refs we have in the list must not be processed, we want | ||
159 | * to wait for more refs to show up or for the end of backref walking. | ||
160 | */ | ||
161 | wait_queue_head_t seq_wait; | ||
162 | }; | 142 | }; |
163 | 143 | ||
164 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | 144 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) |
@@ -195,34 +175,28 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 175 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
196 | struct list_head *cluster, u64 search_start); | 176 | struct list_head *cluster, u64 search_start); |
197 | 177 | ||
198 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) | 178 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, |
199 | { | 179 | struct btrfs_delayed_ref_root *delayed_refs, |
200 | assert_spin_locked(&delayed_refs->lock); | 180 | u64 seq); |
201 | ++delayed_refs->seq; | ||
202 | return delayed_refs->seq; | ||
203 | } | ||
204 | 181 | ||
205 | static inline void | 182 | /* |
206 | btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 183 | * delayed refs with a ref_seq > 0 must be held back during backref walking. |
207 | struct seq_list *elem) | 184 | * this only applies to items in one of the fs-trees. for_cow items never need |
185 | * to be held back, so they won't get a ref_seq number. | ||
186 | */ | ||
187 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
208 | { | 188 | { |
209 | assert_spin_locked(&delayed_refs->lock); | 189 | if (for_cow) |
210 | elem->seq = delayed_refs->seq; | 190 | return 0; |
211 | list_add_tail(&elem->list, &delayed_refs->seq_head); | ||
212 | } | ||
213 | 191 | ||
214 | static inline void | 192 | if (rootid == BTRFS_FS_TREE_OBJECTID) |
215 | btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 193 | return 1; |
216 | struct seq_list *elem) | ||
217 | { | ||
218 | spin_lock(&delayed_refs->lock); | ||
219 | list_del(&elem->list); | ||
220 | wake_up(&delayed_refs->seq_wait); | ||
221 | spin_unlock(&delayed_refs->lock); | ||
222 | } | ||
223 | 194 | ||
224 | int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | 195 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) |
225 | u64 seq); | 196 | return 1; |
197 | |||
198 | return 0; | ||
199 | } | ||
226 | 200 | ||
227 | /* | 201 | /* |
228 | * a node might live in a head or a regular ref, this lets you | 202 | * a node might live in a head or a regular ref, this lets you |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1a4a2a975926..05f4fb6e0607 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1225,6 +1225,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) | |||
1225 | return root; | 1225 | return root; |
1226 | } | 1226 | } |
1227 | 1227 | ||
1228 | struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | ||
1229 | struct btrfs_fs_info *fs_info, | ||
1230 | u64 objectid) | ||
1231 | { | ||
1232 | struct extent_buffer *leaf; | ||
1233 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
1234 | struct btrfs_root *root; | ||
1235 | struct btrfs_key key; | ||
1236 | int ret = 0; | ||
1237 | u64 bytenr; | ||
1238 | |||
1239 | root = btrfs_alloc_root(fs_info); | ||
1240 | if (!root) | ||
1241 | return ERR_PTR(-ENOMEM); | ||
1242 | |||
1243 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
1244 | tree_root->sectorsize, tree_root->stripesize, | ||
1245 | root, fs_info, objectid); | ||
1246 | root->root_key.objectid = objectid; | ||
1247 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
1248 | root->root_key.offset = 0; | ||
1249 | |||
1250 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | ||
1251 | 0, objectid, NULL, 0, 0, 0); | ||
1252 | if (IS_ERR(leaf)) { | ||
1253 | ret = PTR_ERR(leaf); | ||
1254 | goto fail; | ||
1255 | } | ||
1256 | |||
1257 | bytenr = leaf->start; | ||
1258 | memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); | ||
1259 | btrfs_set_header_bytenr(leaf, leaf->start); | ||
1260 | btrfs_set_header_generation(leaf, trans->transid); | ||
1261 | btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); | ||
1262 | btrfs_set_header_owner(leaf, objectid); | ||
1263 | root->node = leaf; | ||
1264 | |||
1265 | write_extent_buffer(leaf, fs_info->fsid, | ||
1266 | (unsigned long)btrfs_header_fsid(leaf), | ||
1267 | BTRFS_FSID_SIZE); | ||
1268 | write_extent_buffer(leaf, fs_info->chunk_tree_uuid, | ||
1269 | (unsigned long)btrfs_header_chunk_tree_uuid(leaf), | ||
1270 | BTRFS_UUID_SIZE); | ||
1271 | btrfs_mark_buffer_dirty(leaf); | ||
1272 | |||
1273 | root->commit_root = btrfs_root_node(root); | ||
1274 | root->track_dirty = 1; | ||
1275 | |||
1276 | |||
1277 | root->root_item.flags = 0; | ||
1278 | root->root_item.byte_limit = 0; | ||
1279 | btrfs_set_root_bytenr(&root->root_item, leaf->start); | ||
1280 | btrfs_set_root_generation(&root->root_item, trans->transid); | ||
1281 | btrfs_set_root_level(&root->root_item, 0); | ||
1282 | btrfs_set_root_refs(&root->root_item, 1); | ||
1283 | btrfs_set_root_used(&root->root_item, leaf->len); | ||
1284 | btrfs_set_root_last_snapshot(&root->root_item, 0); | ||
1285 | btrfs_set_root_dirid(&root->root_item, 0); | ||
1286 | root->root_item.drop_level = 0; | ||
1287 | |||
1288 | key.objectid = objectid; | ||
1289 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
1290 | key.offset = 0; | ||
1291 | ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item); | ||
1292 | if (ret) | ||
1293 | goto fail; | ||
1294 | |||
1295 | btrfs_tree_unlock(leaf); | ||
1296 | |||
1297 | fail: | ||
1298 | if (ret) | ||
1299 | return ERR_PTR(ret); | ||
1300 | |||
1301 | return root; | ||
1302 | } | ||
1303 | |||
1228 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 1304 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
1229 | struct btrfs_fs_info *fs_info) | 1305 | struct btrfs_fs_info *fs_info) |
1230 | { | 1306 | { |
@@ -1396,6 +1472,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
1396 | return fs_info->dev_root; | 1472 | return fs_info->dev_root; |
1397 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | 1473 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) |
1398 | return fs_info->csum_root; | 1474 | return fs_info->csum_root; |
1475 | if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) | ||
1476 | return fs_info->quota_root ? fs_info->quota_root : | ||
1477 | ERR_PTR(-ENOENT); | ||
1399 | again: | 1478 | again: |
1400 | spin_lock(&fs_info->fs_roots_radix_lock); | 1479 | spin_lock(&fs_info->fs_roots_radix_lock); |
1401 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | 1480 | root = radix_tree_lookup(&fs_info->fs_roots_radix, |
@@ -1823,6 +1902,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | |||
1823 | free_extent_buffer(info->extent_root->commit_root); | 1902 | free_extent_buffer(info->extent_root->commit_root); |
1824 | free_extent_buffer(info->csum_root->node); | 1903 | free_extent_buffer(info->csum_root->node); |
1825 | free_extent_buffer(info->csum_root->commit_root); | 1904 | free_extent_buffer(info->csum_root->commit_root); |
1905 | if (info->quota_root) { | ||
1906 | free_extent_buffer(info->quota_root->node); | ||
1907 | free_extent_buffer(info->quota_root->commit_root); | ||
1908 | } | ||
1826 | 1909 | ||
1827 | info->tree_root->node = NULL; | 1910 | info->tree_root->node = NULL; |
1828 | info->tree_root->commit_root = NULL; | 1911 | info->tree_root->commit_root = NULL; |
@@ -1832,6 +1915,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | |||
1832 | info->extent_root->commit_root = NULL; | 1915 | info->extent_root->commit_root = NULL; |
1833 | info->csum_root->node = NULL; | 1916 | info->csum_root->node = NULL; |
1834 | info->csum_root->commit_root = NULL; | 1917 | info->csum_root->commit_root = NULL; |
1918 | if (info->quota_root) { | ||
1919 | info->quota_root->node = NULL; | ||
1920 | info->quota_root->commit_root = NULL; | ||
1921 | } | ||
1835 | 1922 | ||
1836 | if (chunk_root) { | 1923 | if (chunk_root) { |
1837 | free_extent_buffer(info->chunk_root->node); | 1924 | free_extent_buffer(info->chunk_root->node); |
@@ -1862,6 +1949,7 @@ int open_ctree(struct super_block *sb, | |||
1862 | struct btrfs_root *csum_root; | 1949 | struct btrfs_root *csum_root; |
1863 | struct btrfs_root *chunk_root; | 1950 | struct btrfs_root *chunk_root; |
1864 | struct btrfs_root *dev_root; | 1951 | struct btrfs_root *dev_root; |
1952 | struct btrfs_root *quota_root; | ||
1865 | struct btrfs_root *log_tree_root; | 1953 | struct btrfs_root *log_tree_root; |
1866 | int ret; | 1954 | int ret; |
1867 | int err = -EINVAL; | 1955 | int err = -EINVAL; |
@@ -1873,9 +1961,10 @@ int open_ctree(struct super_block *sb, | |||
1873 | csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); | 1961 | csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); |
1874 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); | 1962 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); |
1875 | dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); | 1963 | dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); |
1964 | quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); | ||
1876 | 1965 | ||
1877 | if (!tree_root || !extent_root || !csum_root || | 1966 | if (!tree_root || !extent_root || !csum_root || |
1878 | !chunk_root || !dev_root) { | 1967 | !chunk_root || !dev_root || !quota_root) { |
1879 | err = -ENOMEM; | 1968 | err = -ENOMEM; |
1880 | goto fail; | 1969 | goto fail; |
1881 | } | 1970 | } |
@@ -1944,6 +2033,8 @@ int open_ctree(struct super_block *sb, | |||
1944 | fs_info->free_chunk_space = 0; | 2033 | fs_info->free_chunk_space = 0; |
1945 | fs_info->tree_mod_log = RB_ROOT; | 2034 | fs_info->tree_mod_log = RB_ROOT; |
1946 | 2035 | ||
2036 | init_waitqueue_head(&fs_info->tree_mod_seq_wait); | ||
2037 | |||
1947 | /* readahead state */ | 2038 | /* readahead state */ |
1948 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 2039 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
1949 | spin_lock_init(&fs_info->reada_lock); | 2040 | spin_lock_init(&fs_info->reada_lock); |
@@ -2032,6 +2123,13 @@ int open_ctree(struct super_block *sb, | |||
2032 | init_rwsem(&fs_info->cleanup_work_sem); | 2123 | init_rwsem(&fs_info->cleanup_work_sem); |
2033 | init_rwsem(&fs_info->subvol_sem); | 2124 | init_rwsem(&fs_info->subvol_sem); |
2034 | 2125 | ||
2126 | spin_lock_init(&fs_info->qgroup_lock); | ||
2127 | fs_info->qgroup_tree = RB_ROOT; | ||
2128 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | ||
2129 | fs_info->qgroup_seq = 1; | ||
2130 | fs_info->quota_enabled = 0; | ||
2131 | fs_info->pending_quota_state = 0; | ||
2132 | |||
2035 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 2133 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
2036 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 2134 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
2037 | 2135 | ||
@@ -2356,6 +2454,17 @@ retry_root_backup: | |||
2356 | goto recovery_tree_root; | 2454 | goto recovery_tree_root; |
2357 | csum_root->track_dirty = 1; | 2455 | csum_root->track_dirty = 1; |
2358 | 2456 | ||
2457 | ret = find_and_setup_root(tree_root, fs_info, | ||
2458 | BTRFS_QUOTA_TREE_OBJECTID, quota_root); | ||
2459 | if (ret) { | ||
2460 | kfree(quota_root); | ||
2461 | quota_root = fs_info->quota_root = NULL; | ||
2462 | } else { | ||
2463 | quota_root->track_dirty = 1; | ||
2464 | fs_info->quota_enabled = 1; | ||
2465 | fs_info->pending_quota_state = 1; | ||
2466 | } | ||
2467 | |||
2359 | fs_info->generation = generation; | 2468 | fs_info->generation = generation; |
2360 | fs_info->last_trans_committed = generation; | 2469 | fs_info->last_trans_committed = generation; |
2361 | 2470 | ||
@@ -2415,6 +2524,9 @@ retry_root_backup: | |||
2415 | " integrity check module %s\n", sb->s_id); | 2524 | " integrity check module %s\n", sb->s_id); |
2416 | } | 2525 | } |
2417 | #endif | 2526 | #endif |
2527 | ret = btrfs_read_qgroup_config(fs_info); | ||
2528 | if (ret) | ||
2529 | goto fail_trans_kthread; | ||
2418 | 2530 | ||
2419 | /* do not make disk changes in broken FS */ | 2531 | /* do not make disk changes in broken FS */ |
2420 | if (btrfs_super_log_root(disk_super) != 0 && | 2532 | if (btrfs_super_log_root(disk_super) != 0 && |
@@ -2425,7 +2537,7 @@ retry_root_backup: | |||
2425 | printk(KERN_WARNING "Btrfs log replay required " | 2537 | printk(KERN_WARNING "Btrfs log replay required " |
2426 | "on RO media\n"); | 2538 | "on RO media\n"); |
2427 | err = -EIO; | 2539 | err = -EIO; |
2428 | goto fail_trans_kthread; | 2540 | goto fail_qgroup; |
2429 | } | 2541 | } |
2430 | blocksize = | 2542 | blocksize = |
2431 | btrfs_level_size(tree_root, | 2543 | btrfs_level_size(tree_root, |
@@ -2434,7 +2546,7 @@ retry_root_backup: | |||
2434 | log_tree_root = btrfs_alloc_root(fs_info); | 2546 | log_tree_root = btrfs_alloc_root(fs_info); |
2435 | if (!log_tree_root) { | 2547 | if (!log_tree_root) { |
2436 | err = -ENOMEM; | 2548 | err = -ENOMEM; |
2437 | goto fail_trans_kthread; | 2549 | goto fail_qgroup; |
2438 | } | 2550 | } |
2439 | 2551 | ||
2440 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 2552 | __setup_root(nodesize, leafsize, sectorsize, stripesize, |
@@ -2474,7 +2586,7 @@ retry_root_backup: | |||
2474 | printk(KERN_WARNING | 2586 | printk(KERN_WARNING |
2475 | "btrfs: failed to recover relocation\n"); | 2587 | "btrfs: failed to recover relocation\n"); |
2476 | err = -EINVAL; | 2588 | err = -EINVAL; |
2477 | goto fail_trans_kthread; | 2589 | goto fail_qgroup; |
2478 | } | 2590 | } |
2479 | } | 2591 | } |
2480 | 2592 | ||
@@ -2484,10 +2596,10 @@ retry_root_backup: | |||
2484 | 2596 | ||
2485 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 2597 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
2486 | if (!fs_info->fs_root) | 2598 | if (!fs_info->fs_root) |
2487 | goto fail_trans_kthread; | 2599 | goto fail_qgroup; |
2488 | if (IS_ERR(fs_info->fs_root)) { | 2600 | if (IS_ERR(fs_info->fs_root)) { |
2489 | err = PTR_ERR(fs_info->fs_root); | 2601 | err = PTR_ERR(fs_info->fs_root); |
2490 | goto fail_trans_kthread; | 2602 | goto fail_qgroup; |
2491 | } | 2603 | } |
2492 | 2604 | ||
2493 | if (sb->s_flags & MS_RDONLY) | 2605 | if (sb->s_flags & MS_RDONLY) |
@@ -2511,6 +2623,8 @@ retry_root_backup: | |||
2511 | 2623 | ||
2512 | return 0; | 2624 | return 0; |
2513 | 2625 | ||
2626 | fail_qgroup: | ||
2627 | btrfs_free_qgroup_config(fs_info); | ||
2514 | fail_trans_kthread: | 2628 | fail_trans_kthread: |
2515 | kthread_stop(fs_info->transaction_kthread); | 2629 | kthread_stop(fs_info->transaction_kthread); |
2516 | fail_cleaner: | 2630 | fail_cleaner: |
@@ -3109,6 +3223,8 @@ int close_ctree(struct btrfs_root *root) | |||
3109 | fs_info->closing = 2; | 3223 | fs_info->closing = 2; |
3110 | smp_mb(); | 3224 | smp_mb(); |
3111 | 3225 | ||
3226 | btrfs_free_qgroup_config(root->fs_info); | ||
3227 | |||
3112 | if (fs_info->delalloc_bytes) { | 3228 | if (fs_info->delalloc_bytes) { |
3113 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3229 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
3114 | (unsigned long long)fs_info->delalloc_bytes); | 3230 | (unsigned long long)fs_info->delalloc_bytes); |
@@ -3128,6 +3244,10 @@ int close_ctree(struct btrfs_root *root) | |||
3128 | free_extent_buffer(fs_info->dev_root->commit_root); | 3244 | free_extent_buffer(fs_info->dev_root->commit_root); |
3129 | free_extent_buffer(fs_info->csum_root->node); | 3245 | free_extent_buffer(fs_info->csum_root->node); |
3130 | free_extent_buffer(fs_info->csum_root->commit_root); | 3246 | free_extent_buffer(fs_info->csum_root->commit_root); |
3247 | if (fs_info->quota_root) { | ||
3248 | free_extent_buffer(fs_info->quota_root->node); | ||
3249 | free_extent_buffer(fs_info->quota_root->commit_root); | ||
3250 | } | ||
3131 | 3251 | ||
3132 | btrfs_free_block_groups(fs_info); | 3252 | btrfs_free_block_groups(fs_info); |
3133 | 3253 | ||
@@ -3258,7 +3378,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
3258 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 3378 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
3259 | } | 3379 | } |
3260 | 3380 | ||
3261 | static int btree_lock_page_hook(struct page *page, void *data, | 3381 | int btree_lock_page_hook(struct page *page, void *data, |
3262 | void (*flush_fn)(void *)) | 3382 | void (*flush_fn)(void *)) |
3263 | { | 3383 | { |
3264 | struct inode *inode = page->mapping->host; | 3384 | struct inode *inode = page->mapping->host; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 05b3fab39f7e..95e147eea239 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
89 | int btrfs_cleanup_transaction(struct btrfs_root *root); | 89 | int btrfs_cleanup_transaction(struct btrfs_root *root); |
90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, | 90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, |
91 | struct btrfs_root *root); | 91 | struct btrfs_root *root); |
92 | void btrfs_abort_devices(struct btrfs_root *root); | ||
93 | struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | ||
94 | struct btrfs_fs_info *fs_info, | ||
95 | u64 objectid); | ||
96 | int btree_lock_page_hook(struct page *page, void *data, | ||
97 | void (*flush_fn)(void *)); | ||
92 | 98 | ||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 99 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
94 | void btrfs_init_lockdep(void); | 100 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71b2d1c7da69..44f06201f376 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include "locking.h" | 34 | #include "locking.h" |
35 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" |
36 | 36 | ||
37 | #undef SCRAMBLE_DELAYED_REFS | ||
38 | |||
37 | /* | 39 | /* |
38 | * control flags for do_chunk_alloc's force field | 40 | * control flags for do_chunk_alloc's force field |
39 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | 41 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk |
@@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2217 | struct btrfs_delayed_ref_node *ref; | 2219 | struct btrfs_delayed_ref_node *ref; |
2218 | struct btrfs_delayed_ref_head *locked_ref = NULL; | 2220 | struct btrfs_delayed_ref_head *locked_ref = NULL; |
2219 | struct btrfs_delayed_extent_op *extent_op; | 2221 | struct btrfs_delayed_extent_op *extent_op; |
2222 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
2220 | int ret; | 2223 | int ret; |
2221 | int count = 0; | 2224 | int count = 0; |
2222 | int must_insert_reserved = 0; | 2225 | int must_insert_reserved = 0; |
@@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2255 | ref = select_delayed_ref(locked_ref); | 2258 | ref = select_delayed_ref(locked_ref); |
2256 | 2259 | ||
2257 | if (ref && ref->seq && | 2260 | if (ref && ref->seq && |
2258 | btrfs_check_delayed_seq(delayed_refs, ref->seq)) { | 2261 | btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { |
2259 | /* | 2262 | /* |
2260 | * there are still refs with lower seq numbers in the | 2263 | * there are still refs with lower seq numbers in the |
2261 | * process of being added. Don't run this ref yet. | 2264 | * process of being added. Don't run this ref yet. |
@@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2337 | } | 2340 | } |
2338 | 2341 | ||
2339 | next: | 2342 | next: |
2340 | do_chunk_alloc(trans, root->fs_info->extent_root, | 2343 | do_chunk_alloc(trans, fs_info->extent_root, |
2341 | 2 * 1024 * 1024, | 2344 | 2 * 1024 * 1024, |
2342 | btrfs_get_alloc_profile(root, 0), | 2345 | btrfs_get_alloc_profile(root, 0), |
2343 | CHUNK_ALLOC_NO_FORCE); | 2346 | CHUNK_ALLOC_NO_FORCE); |
@@ -2347,21 +2350,99 @@ next: | |||
2347 | return count; | 2350 | return count; |
2348 | } | 2351 | } |
2349 | 2352 | ||
2350 | static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, | 2353 | static void wait_for_more_refs(struct btrfs_fs_info *fs_info, |
2354 | struct btrfs_delayed_ref_root *delayed_refs, | ||
2351 | unsigned long num_refs, | 2355 | unsigned long num_refs, |
2352 | struct list_head *first_seq) | 2356 | struct list_head *first_seq) |
2353 | { | 2357 | { |
2354 | spin_unlock(&delayed_refs->lock); | 2358 | spin_unlock(&delayed_refs->lock); |
2355 | pr_debug("waiting for more refs (num %ld, first %p)\n", | 2359 | pr_debug("waiting for more refs (num %ld, first %p)\n", |
2356 | num_refs, first_seq); | 2360 | num_refs, first_seq); |
2357 | wait_event(delayed_refs->seq_wait, | 2361 | wait_event(fs_info->tree_mod_seq_wait, |
2358 | num_refs != delayed_refs->num_entries || | 2362 | num_refs != delayed_refs->num_entries || |
2359 | delayed_refs->seq_head.next != first_seq); | 2363 | fs_info->tree_mod_seq_list.next != first_seq); |
2360 | pr_debug("done waiting for more refs (num %ld, first %p)\n", | 2364 | pr_debug("done waiting for more refs (num %ld, first %p)\n", |
2361 | delayed_refs->num_entries, delayed_refs->seq_head.next); | 2365 | delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); |
2362 | spin_lock(&delayed_refs->lock); | 2366 | spin_lock(&delayed_refs->lock); |
2363 | } | 2367 | } |
2364 | 2368 | ||
2369 | #ifdef SCRAMBLE_DELAYED_REFS | ||
2370 | /* | ||
2371 | * Normally delayed refs get processed in ascending bytenr order. This | ||
2372 | * correlates in most cases to the order added. To expose dependencies on this | ||
2373 | * order, we start to process the tree in the middle instead of the beginning | ||
2374 | */ | ||
2375 | static u64 find_middle(struct rb_root *root) | ||
2376 | { | ||
2377 | struct rb_node *n = root->rb_node; | ||
2378 | struct btrfs_delayed_ref_node *entry; | ||
2379 | int alt = 1; | ||
2380 | u64 middle; | ||
2381 | u64 first = 0, last = 0; | ||
2382 | |||
2383 | n = rb_first(root); | ||
2384 | if (n) { | ||
2385 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
2386 | first = entry->bytenr; | ||
2387 | } | ||
2388 | n = rb_last(root); | ||
2389 | if (n) { | ||
2390 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
2391 | last = entry->bytenr; | ||
2392 | } | ||
2393 | n = root->rb_node; | ||
2394 | |||
2395 | while (n) { | ||
2396 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
2397 | WARN_ON(!entry->in_tree); | ||
2398 | |||
2399 | middle = entry->bytenr; | ||
2400 | |||
2401 | if (alt) | ||
2402 | n = n->rb_left; | ||
2403 | else | ||
2404 | n = n->rb_right; | ||
2405 | |||
2406 | alt = 1 - alt; | ||
2407 | } | ||
2408 | return middle; | ||
2409 | } | ||
2410 | #endif | ||
2411 | |||
2412 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | ||
2413 | struct btrfs_fs_info *fs_info) | ||
2414 | { | ||
2415 | struct qgroup_update *qgroup_update; | ||
2416 | int ret = 0; | ||
2417 | |||
2418 | if (list_empty(&trans->qgroup_ref_list) != | ||
2419 | !trans->delayed_ref_elem.seq) { | ||
2420 | /* list without seq or seq without list */ | ||
2421 | printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n", | ||
2422 | list_empty(&trans->qgroup_ref_list) ? "" : " not", | ||
2423 | trans->delayed_ref_elem.seq); | ||
2424 | BUG(); | ||
2425 | } | ||
2426 | |||
2427 | if (!trans->delayed_ref_elem.seq) | ||
2428 | return 0; | ||
2429 | |||
2430 | while (!list_empty(&trans->qgroup_ref_list)) { | ||
2431 | qgroup_update = list_first_entry(&trans->qgroup_ref_list, | ||
2432 | struct qgroup_update, list); | ||
2433 | list_del(&qgroup_update->list); | ||
2434 | if (!ret) | ||
2435 | ret = btrfs_qgroup_account_ref( | ||
2436 | trans, fs_info, qgroup_update->node, | ||
2437 | qgroup_update->extent_op); | ||
2438 | kfree(qgroup_update); | ||
2439 | } | ||
2440 | |||
2441 | btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem); | ||
2442 | |||
2443 | return ret; | ||
2444 | } | ||
2445 | |||
2365 | /* | 2446 | /* |
2366 | * this starts processing the delayed reference count updates and | 2447 | * this starts processing the delayed reference count updates and |
2367 | * extent insertions we have queued up so far. count can be | 2448 | * extent insertions we have queued up so far. count can be |
@@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2398 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | 2479 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), |
2399 | CHUNK_ALLOC_NO_FORCE); | 2480 | CHUNK_ALLOC_NO_FORCE); |
2400 | 2481 | ||
2482 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
2483 | |||
2401 | delayed_refs = &trans->transaction->delayed_refs; | 2484 | delayed_refs = &trans->transaction->delayed_refs; |
2402 | INIT_LIST_HEAD(&cluster); | 2485 | INIT_LIST_HEAD(&cluster); |
2403 | again: | 2486 | again: |
2404 | consider_waiting = 0; | 2487 | consider_waiting = 0; |
2405 | spin_lock(&delayed_refs->lock); | 2488 | spin_lock(&delayed_refs->lock); |
2489 | |||
2490 | #ifdef SCRAMBLE_DELAYED_REFS | ||
2491 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); | ||
2492 | #endif | ||
2493 | |||
2406 | if (count == 0) { | 2494 | if (count == 0) { |
2407 | count = delayed_refs->num_entries * 2; | 2495 | count = delayed_refs->num_entries * 2; |
2408 | run_most = 1; | 2496 | run_most = 1; |
@@ -2437,7 +2525,7 @@ again: | |||
2437 | num_refs = delayed_refs->num_entries; | 2525 | num_refs = delayed_refs->num_entries; |
2438 | first_seq = root->fs_info->tree_mod_seq_list.next; | 2526 | first_seq = root->fs_info->tree_mod_seq_list.next; |
2439 | } else { | 2527 | } else { |
2440 | wait_for_more_refs(delayed_refs, | 2528 | wait_for_more_refs(root->fs_info, delayed_refs, |
2441 | num_refs, first_seq); | 2529 | num_refs, first_seq); |
2442 | /* | 2530 | /* |
2443 | * after waiting, things have changed. we | 2531 | * after waiting, things have changed. we |
@@ -2502,6 +2590,7 @@ again: | |||
2502 | } | 2590 | } |
2503 | out: | 2591 | out: |
2504 | spin_unlock(&delayed_refs->lock); | 2592 | spin_unlock(&delayed_refs->lock); |
2593 | assert_qgroups_uptodate(trans); | ||
2505 | return 0; | 2594 | return 0; |
2506 | } | 2595 | } |
2507 | 2596 | ||
@@ -4479,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4479 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 4568 | csum_bytes = BTRFS_I(inode)->csum_bytes; |
4480 | spin_unlock(&BTRFS_I(inode)->lock); | 4569 | spin_unlock(&BTRFS_I(inode)->lock); |
4481 | 4570 | ||
4571 | if (root->fs_info->quota_enabled) { | ||
4572 | ret = btrfs_qgroup_reserve(root, num_bytes + | ||
4573 | nr_extents * root->leafsize); | ||
4574 | if (ret) | ||
4575 | return ret; | ||
4576 | } | ||
4577 | |||
4482 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4578 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
4483 | if (ret) { | 4579 | if (ret) { |
4484 | u64 to_free = 0; | 4580 | u64 to_free = 0; |
@@ -4557,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4557 | 4653 | ||
4558 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | 4654 | trace_btrfs_space_reservation(root->fs_info, "delalloc", |
4559 | btrfs_ino(inode), to_free, 0); | 4655 | btrfs_ino(inode), to_free, 0); |
4656 | if (root->fs_info->quota_enabled) { | ||
4657 | btrfs_qgroup_free(root, num_bytes + | ||
4658 | dropped * root->leafsize); | ||
4659 | } | ||
4660 | |||
4560 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4661 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4561 | to_free); | 4662 | to_free); |
4562 | } | 4663 | } |
@@ -5193,8 +5294,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
5193 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 5294 | rb_erase(&head->node.rb_node, &delayed_refs->root); |
5194 | 5295 | ||
5195 | delayed_refs->num_entries--; | 5296 | delayed_refs->num_entries--; |
5196 | if (waitqueue_active(&delayed_refs->seq_wait)) | 5297 | if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) |
5197 | wake_up(&delayed_refs->seq_wait); | 5298 | wake_up(&root->fs_info->tree_mod_seq_wait); |
5198 | 5299 | ||
5199 | /* | 5300 | /* |
5200 | * we don't take a ref on the node because we're removing it from the | 5301 | * we don't take a ref on the node because we're removing it from the |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 17facea6a51c..e54b663fd3aa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -336,7 +336,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
336 | static noinline int create_subvol(struct btrfs_root *root, | 336 | static noinline int create_subvol(struct btrfs_root *root, |
337 | struct dentry *dentry, | 337 | struct dentry *dentry, |
338 | char *name, int namelen, | 338 | char *name, int namelen, |
339 | u64 *async_transid) | 339 | u64 *async_transid, |
340 | struct btrfs_qgroup_inherit **inherit) | ||
340 | { | 341 | { |
341 | struct btrfs_trans_handle *trans; | 342 | struct btrfs_trans_handle *trans; |
342 | struct btrfs_key key; | 343 | struct btrfs_key key; |
@@ -368,6 +369,11 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
368 | if (IS_ERR(trans)) | 369 | if (IS_ERR(trans)) |
369 | return PTR_ERR(trans); | 370 | return PTR_ERR(trans); |
370 | 371 | ||
372 | ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, | ||
373 | inherit ? *inherit : NULL); | ||
374 | if (ret) | ||
375 | goto fail; | ||
376 | |||
371 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 377 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
372 | 0, objectid, NULL, 0, 0, 0); | 378 | 0, objectid, NULL, 0, 0, 0); |
373 | if (IS_ERR(leaf)) { | 379 | if (IS_ERR(leaf)) { |
@@ -484,7 +490,7 @@ fail: | |||
484 | 490 | ||
485 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 491 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
486 | char *name, int namelen, u64 *async_transid, | 492 | char *name, int namelen, u64 *async_transid, |
487 | bool readonly) | 493 | bool readonly, struct btrfs_qgroup_inherit **inherit) |
488 | { | 494 | { |
489 | struct inode *inode; | 495 | struct inode *inode; |
490 | struct btrfs_pending_snapshot *pending_snapshot; | 496 | struct btrfs_pending_snapshot *pending_snapshot; |
@@ -502,6 +508,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
502 | pending_snapshot->dentry = dentry; | 508 | pending_snapshot->dentry = dentry; |
503 | pending_snapshot->root = root; | 509 | pending_snapshot->root = root; |
504 | pending_snapshot->readonly = readonly; | 510 | pending_snapshot->readonly = readonly; |
511 | if (inherit) { | ||
512 | pending_snapshot->inherit = *inherit; | ||
513 | *inherit = NULL; /* take responsibility to free it */ | ||
514 | } | ||
505 | 515 | ||
506 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 516 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
507 | if (IS_ERR(trans)) { | 517 | if (IS_ERR(trans)) { |
@@ -635,7 +645,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
635 | static noinline int btrfs_mksubvol(struct path *parent, | 645 | static noinline int btrfs_mksubvol(struct path *parent, |
636 | char *name, int namelen, | 646 | char *name, int namelen, |
637 | struct btrfs_root *snap_src, | 647 | struct btrfs_root *snap_src, |
638 | u64 *async_transid, bool readonly) | 648 | u64 *async_transid, bool readonly, |
649 | struct btrfs_qgroup_inherit **inherit) | ||
639 | { | 650 | { |
640 | struct inode *dir = parent->dentry->d_inode; | 651 | struct inode *dir = parent->dentry->d_inode; |
641 | struct dentry *dentry; | 652 | struct dentry *dentry; |
@@ -662,11 +673,11 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
662 | goto out_up_read; | 673 | goto out_up_read; |
663 | 674 | ||
664 | if (snap_src) { | 675 | if (snap_src) { |
665 | error = create_snapshot(snap_src, dentry, | 676 | error = create_snapshot(snap_src, dentry, name, namelen, |
666 | name, namelen, async_transid, readonly); | 677 | async_transid, readonly, inherit); |
667 | } else { | 678 | } else { |
668 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 679 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
669 | name, namelen, async_transid); | 680 | name, namelen, async_transid, inherit); |
670 | } | 681 | } |
671 | if (!error) | 682 | if (!error) |
672 | fsnotify_mkdir(dir, dentry); | 683 | fsnotify_mkdir(dir, dentry); |
@@ -1375,11 +1386,9 @@ out: | |||
1375 | } | 1386 | } |
1376 | 1387 | ||
1377 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | 1388 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, |
1378 | char *name, | 1389 | char *name, unsigned long fd, int subvol, |
1379 | unsigned long fd, | 1390 | u64 *transid, bool readonly, |
1380 | int subvol, | 1391 | struct btrfs_qgroup_inherit **inherit) |
1381 | u64 *transid, | ||
1382 | bool readonly) | ||
1383 | { | 1392 | { |
1384 | struct file *src_file; | 1393 | struct file *src_file; |
1385 | int namelen; | 1394 | int namelen; |
@@ -1403,7 +1412,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1403 | 1412 | ||
1404 | if (subvol) { | 1413 | if (subvol) { |
1405 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1414 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1406 | NULL, transid, readonly); | 1415 | NULL, transid, readonly, inherit); |
1407 | } else { | 1416 | } else { |
1408 | struct inode *src_inode; | 1417 | struct inode *src_inode; |
1409 | src_file = fget(fd); | 1418 | src_file = fget(fd); |
@@ -1422,7 +1431,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1422 | } | 1431 | } |
1423 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1432 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
1424 | BTRFS_I(src_inode)->root, | 1433 | BTRFS_I(src_inode)->root, |
1425 | transid, readonly); | 1434 | transid, readonly, inherit); |
1426 | fput(src_file); | 1435 | fput(src_file); |
1427 | } | 1436 | } |
1428 | out_drop_write: | 1437 | out_drop_write: |
@@ -1444,7 +1453,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
1444 | 1453 | ||
1445 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | 1454 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
1446 | vol_args->fd, subvol, | 1455 | vol_args->fd, subvol, |
1447 | NULL, false); | 1456 | NULL, false, NULL); |
1448 | 1457 | ||
1449 | kfree(vol_args); | 1458 | kfree(vol_args); |
1450 | return ret; | 1459 | return ret; |
@@ -1458,6 +1467,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1458 | u64 transid = 0; | 1467 | u64 transid = 0; |
1459 | u64 *ptr = NULL; | 1468 | u64 *ptr = NULL; |
1460 | bool readonly = false; | 1469 | bool readonly = false; |
1470 | struct btrfs_qgroup_inherit *inherit = NULL; | ||
1461 | 1471 | ||
1462 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1472 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
1463 | if (IS_ERR(vol_args)) | 1473 | if (IS_ERR(vol_args)) |
@@ -1465,7 +1475,8 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1465 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | 1475 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; |
1466 | 1476 | ||
1467 | if (vol_args->flags & | 1477 | if (vol_args->flags & |
1468 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { | 1478 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | |
1479 | BTRFS_SUBVOL_QGROUP_INHERIT)) { | ||
1469 | ret = -EOPNOTSUPP; | 1480 | ret = -EOPNOTSUPP; |
1470 | goto out; | 1481 | goto out; |
1471 | } | 1482 | } |
@@ -1474,10 +1485,21 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1474 | ptr = &transid; | 1485 | ptr = &transid; |
1475 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | 1486 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) |
1476 | readonly = true; | 1487 | readonly = true; |
1488 | if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { | ||
1489 | if (vol_args->size > PAGE_CACHE_SIZE) { | ||
1490 | ret = -EINVAL; | ||
1491 | goto out; | ||
1492 | } | ||
1493 | inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); | ||
1494 | if (IS_ERR(inherit)) { | ||
1495 | ret = PTR_ERR(inherit); | ||
1496 | goto out; | ||
1497 | } | ||
1498 | } | ||
1477 | 1499 | ||
1478 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | 1500 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
1479 | vol_args->fd, subvol, | 1501 | vol_args->fd, subvol, ptr, |
1480 | ptr, readonly); | 1502 | readonly, &inherit); |
1481 | 1503 | ||
1482 | if (ret == 0 && ptr && | 1504 | if (ret == 0 && ptr && |
1483 | copy_to_user(arg + | 1505 | copy_to_user(arg + |
@@ -1486,6 +1508,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1486 | ret = -EFAULT; | 1508 | ret = -EFAULT; |
1487 | out: | 1509 | out: |
1488 | kfree(vol_args); | 1510 | kfree(vol_args); |
1511 | kfree(inherit); | ||
1489 | return ret; | 1512 | return ret; |
1490 | } | 1513 | } |
1491 | 1514 | ||
@@ -3401,6 +3424,183 @@ out: | |||
3401 | return ret; | 3424 | return ret; |
3402 | } | 3425 | } |
3403 | 3426 | ||
3427 | static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | ||
3428 | { | ||
3429 | struct btrfs_ioctl_quota_ctl_args *sa; | ||
3430 | struct btrfs_trans_handle *trans = NULL; | ||
3431 | int ret; | ||
3432 | int err; | ||
3433 | |||
3434 | if (!capable(CAP_SYS_ADMIN)) | ||
3435 | return -EPERM; | ||
3436 | |||
3437 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3438 | return -EROFS; | ||
3439 | |||
3440 | sa = memdup_user(arg, sizeof(*sa)); | ||
3441 | if (IS_ERR(sa)) | ||
3442 | return PTR_ERR(sa); | ||
3443 | |||
3444 | if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { | ||
3445 | trans = btrfs_start_transaction(root, 2); | ||
3446 | if (IS_ERR(trans)) { | ||
3447 | ret = PTR_ERR(trans); | ||
3448 | goto out; | ||
3449 | } | ||
3450 | } | ||
3451 | |||
3452 | switch (sa->cmd) { | ||
3453 | case BTRFS_QUOTA_CTL_ENABLE: | ||
3454 | ret = btrfs_quota_enable(trans, root->fs_info); | ||
3455 | break; | ||
3456 | case BTRFS_QUOTA_CTL_DISABLE: | ||
3457 | ret = btrfs_quota_disable(trans, root->fs_info); | ||
3458 | break; | ||
3459 | case BTRFS_QUOTA_CTL_RESCAN: | ||
3460 | ret = btrfs_quota_rescan(root->fs_info); | ||
3461 | break; | ||
3462 | default: | ||
3463 | ret = -EINVAL; | ||
3464 | break; | ||
3465 | } | ||
3466 | |||
3467 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
3468 | ret = -EFAULT; | ||
3469 | |||
3470 | if (trans) { | ||
3471 | err = btrfs_commit_transaction(trans, root); | ||
3472 | if (err && !ret) | ||
3473 | ret = err; | ||
3474 | } | ||
3475 | |||
3476 | out: | ||
3477 | kfree(sa); | ||
3478 | return ret; | ||
3479 | } | ||
3480 | |||
3481 | static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | ||
3482 | { | ||
3483 | struct btrfs_ioctl_qgroup_assign_args *sa; | ||
3484 | struct btrfs_trans_handle *trans; | ||
3485 | int ret; | ||
3486 | int err; | ||
3487 | |||
3488 | if (!capable(CAP_SYS_ADMIN)) | ||
3489 | return -EPERM; | ||
3490 | |||
3491 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3492 | return -EROFS; | ||
3493 | |||
3494 | sa = memdup_user(arg, sizeof(*sa)); | ||
3495 | if (IS_ERR(sa)) | ||
3496 | return PTR_ERR(sa); | ||
3497 | |||
3498 | trans = btrfs_join_transaction(root); | ||
3499 | if (IS_ERR(trans)) { | ||
3500 | ret = PTR_ERR(trans); | ||
3501 | goto out; | ||
3502 | } | ||
3503 | |||
3504 | /* FIXME: check if the IDs really exist */ | ||
3505 | if (sa->assign) { | ||
3506 | ret = btrfs_add_qgroup_relation(trans, root->fs_info, | ||
3507 | sa->src, sa->dst); | ||
3508 | } else { | ||
3509 | ret = btrfs_del_qgroup_relation(trans, root->fs_info, | ||
3510 | sa->src, sa->dst); | ||
3511 | } | ||
3512 | |||
3513 | err = btrfs_end_transaction(trans, root); | ||
3514 | if (err && !ret) | ||
3515 | ret = err; | ||
3516 | |||
3517 | out: | ||
3518 | kfree(sa); | ||
3519 | return ret; | ||
3520 | } | ||
3521 | |||
3522 | static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | ||
3523 | { | ||
3524 | struct btrfs_ioctl_qgroup_create_args *sa; | ||
3525 | struct btrfs_trans_handle *trans; | ||
3526 | int ret; | ||
3527 | int err; | ||
3528 | |||
3529 | if (!capable(CAP_SYS_ADMIN)) | ||
3530 | return -EPERM; | ||
3531 | |||
3532 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3533 | return -EROFS; | ||
3534 | |||
3535 | sa = memdup_user(arg, sizeof(*sa)); | ||
3536 | if (IS_ERR(sa)) | ||
3537 | return PTR_ERR(sa); | ||
3538 | |||
3539 | trans = btrfs_join_transaction(root); | ||
3540 | if (IS_ERR(trans)) { | ||
3541 | ret = PTR_ERR(trans); | ||
3542 | goto out; | ||
3543 | } | ||
3544 | |||
3545 | /* FIXME: check if the IDs really exist */ | ||
3546 | if (sa->create) { | ||
3547 | ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, | ||
3548 | NULL); | ||
3549 | } else { | ||
3550 | ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); | ||
3551 | } | ||
3552 | |||
3553 | err = btrfs_end_transaction(trans, root); | ||
3554 | if (err && !ret) | ||
3555 | ret = err; | ||
3556 | |||
3557 | out: | ||
3558 | kfree(sa); | ||
3559 | return ret; | ||
3560 | } | ||
3561 | |||
3562 | static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | ||
3563 | { | ||
3564 | struct btrfs_ioctl_qgroup_limit_args *sa; | ||
3565 | struct btrfs_trans_handle *trans; | ||
3566 | int ret; | ||
3567 | int err; | ||
3568 | u64 qgroupid; | ||
3569 | |||
3570 | if (!capable(CAP_SYS_ADMIN)) | ||
3571 | return -EPERM; | ||
3572 | |||
3573 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
3574 | return -EROFS; | ||
3575 | |||
3576 | sa = memdup_user(arg, sizeof(*sa)); | ||
3577 | if (IS_ERR(sa)) | ||
3578 | return PTR_ERR(sa); | ||
3579 | |||
3580 | trans = btrfs_join_transaction(root); | ||
3581 | if (IS_ERR(trans)) { | ||
3582 | ret = PTR_ERR(trans); | ||
3583 | goto out; | ||
3584 | } | ||
3585 | |||
3586 | qgroupid = sa->qgroupid; | ||
3587 | if (!qgroupid) { | ||
3588 | /* take the current subvol as qgroup */ | ||
3589 | qgroupid = root->root_key.objectid; | ||
3590 | } | ||
3591 | |||
3592 | /* FIXME: check if the IDs really exist */ | ||
3593 | ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); | ||
3594 | |||
3595 | err = btrfs_end_transaction(trans, root); | ||
3596 | if (err && !ret) | ||
3597 | ret = err; | ||
3598 | |||
3599 | out: | ||
3600 | kfree(sa); | ||
3601 | return ret; | ||
3602 | } | ||
3603 | |||
3404 | long btrfs_ioctl(struct file *file, unsigned int | 3604 | long btrfs_ioctl(struct file *file, unsigned int |
3405 | cmd, unsigned long arg) | 3605 | cmd, unsigned long arg) |
3406 | { | 3606 | { |
@@ -3422,6 +3622,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3422 | return btrfs_ioctl_snap_create_v2(file, argp, 0); | 3622 | return btrfs_ioctl_snap_create_v2(file, argp, 0); |
3423 | case BTRFS_IOC_SUBVOL_CREATE: | 3623 | case BTRFS_IOC_SUBVOL_CREATE: |
3424 | return btrfs_ioctl_snap_create(file, argp, 1); | 3624 | return btrfs_ioctl_snap_create(file, argp, 1); |
3625 | case BTRFS_IOC_SUBVOL_CREATE_V2: | ||
3626 | return btrfs_ioctl_snap_create_v2(file, argp, 1); | ||
3425 | case BTRFS_IOC_SNAP_DESTROY: | 3627 | case BTRFS_IOC_SNAP_DESTROY: |
3426 | return btrfs_ioctl_snap_destroy(file, argp); | 3628 | return btrfs_ioctl_snap_destroy(file, argp); |
3427 | case BTRFS_IOC_SUBVOL_GETFLAGS: | 3629 | case BTRFS_IOC_SUBVOL_GETFLAGS: |
@@ -3485,6 +3687,14 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3485 | return btrfs_ioctl_balance_progress(root, argp); | 3687 | return btrfs_ioctl_balance_progress(root, argp); |
3486 | case BTRFS_IOC_GET_DEV_STATS: | 3688 | case BTRFS_IOC_GET_DEV_STATS: |
3487 | return btrfs_ioctl_get_dev_stats(root, argp); | 3689 | return btrfs_ioctl_get_dev_stats(root, argp); |
3690 | case BTRFS_IOC_QUOTA_CTL: | ||
3691 | return btrfs_ioctl_quota_ctl(root, argp); | ||
3692 | case BTRFS_IOC_QGROUP_ASSIGN: | ||
3693 | return btrfs_ioctl_qgroup_assign(root, argp); | ||
3694 | case BTRFS_IOC_QGROUP_CREATE: | ||
3695 | return btrfs_ioctl_qgroup_create(root, argp); | ||
3696 | case BTRFS_IOC_QGROUP_LIMIT: | ||
3697 | return btrfs_ioctl_qgroup_limit(root, argp); | ||
3488 | } | 3698 | } |
3489 | 3699 | ||
3490 | return -ENOTTY; | 3700 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 4e3e5d342a2b..3f9701d571ea 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -32,15 +32,46 @@ struct btrfs_ioctl_vol_args { | |||
32 | 32 | ||
33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | 34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) |
35 | #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) | ||
35 | #define BTRFS_FSID_SIZE 16 | 36 | #define BTRFS_FSID_SIZE 16 |
36 | #define BTRFS_UUID_SIZE 16 | 37 | #define BTRFS_UUID_SIZE 16 |
37 | 38 | ||
39 | #define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) | ||
40 | |||
41 | struct btrfs_qgroup_limit { | ||
42 | __u64 flags; | ||
43 | __u64 max_rfer; | ||
44 | __u64 max_excl; | ||
45 | __u64 rsv_rfer; | ||
46 | __u64 rsv_excl; | ||
47 | }; | ||
48 | |||
49 | struct btrfs_qgroup_inherit { | ||
50 | __u64 flags; | ||
51 | __u64 num_qgroups; | ||
52 | __u64 num_ref_copies; | ||
53 | __u64 num_excl_copies; | ||
54 | struct btrfs_qgroup_limit lim; | ||
55 | __u64 qgroups[0]; | ||
56 | }; | ||
57 | |||
58 | struct btrfs_ioctl_qgroup_limit_args { | ||
59 | __u64 qgroupid; | ||
60 | struct btrfs_qgroup_limit lim; | ||
61 | }; | ||
62 | |||
38 | #define BTRFS_SUBVOL_NAME_MAX 4039 | 63 | #define BTRFS_SUBVOL_NAME_MAX 4039 |
39 | struct btrfs_ioctl_vol_args_v2 { | 64 | struct btrfs_ioctl_vol_args_v2 { |
40 | __s64 fd; | 65 | __s64 fd; |
41 | __u64 transid; | 66 | __u64 transid; |
42 | __u64 flags; | 67 | __u64 flags; |
43 | __u64 unused[4]; | 68 | union { |
69 | struct { | ||
70 | __u64 size; | ||
71 | struct btrfs_qgroup_inherit __user *qgroup_inherit; | ||
72 | }; | ||
73 | __u64 unused[4]; | ||
74 | }; | ||
44 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; | 75 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; |
45 | }; | 76 | }; |
46 | 77 | ||
@@ -299,6 +330,25 @@ struct btrfs_ioctl_get_dev_stats { | |||
299 | __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ | 330 | __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ |
300 | }; | 331 | }; |
301 | 332 | ||
333 | #define BTRFS_QUOTA_CTL_ENABLE 1 | ||
334 | #define BTRFS_QUOTA_CTL_DISABLE 2 | ||
335 | #define BTRFS_QUOTA_CTL_RESCAN 3 | ||
336 | struct btrfs_ioctl_quota_ctl_args { | ||
337 | __u64 cmd; | ||
338 | __u64 status; | ||
339 | }; | ||
340 | |||
341 | struct btrfs_ioctl_qgroup_assign_args { | ||
342 | __u64 assign; | ||
343 | __u64 src; | ||
344 | __u64 dst; | ||
345 | }; | ||
346 | |||
347 | struct btrfs_ioctl_qgroup_create_args { | ||
348 | __u64 create; | ||
349 | __u64 qgroupid; | ||
350 | }; | ||
351 | |||
302 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | 352 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ |
303 | struct btrfs_ioctl_vol_args) | 353 | struct btrfs_ioctl_vol_args) |
304 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | 354 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ |
@@ -343,6 +393,8 @@ struct btrfs_ioctl_get_dev_stats { | |||
343 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 393 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
344 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 394 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
345 | struct btrfs_ioctl_vol_args_v2) | 395 | struct btrfs_ioctl_vol_args_v2) |
396 | #define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ | ||
397 | struct btrfs_ioctl_vol_args_v2) | ||
346 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) | 398 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) |
347 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | 399 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) |
348 | #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ | 400 | #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ |
@@ -365,6 +417,14 @@ struct btrfs_ioctl_get_dev_stats { | |||
365 | struct btrfs_ioctl_ino_path_args) | 417 | struct btrfs_ioctl_ino_path_args) |
366 | #define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ | 418 | #define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ |
367 | struct btrfs_ioctl_vol_args) | 419 | struct btrfs_ioctl_vol_args) |
420 | #define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ | ||
421 | struct btrfs_ioctl_quota_ctl_args) | ||
422 | #define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ | ||
423 | struct btrfs_ioctl_qgroup_assign_args) | ||
424 | #define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ | ||
425 | struct btrfs_ioctl_qgroup_create_args) | ||
426 | #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ | ||
427 | struct btrfs_ioctl_qgroup_limit_args) | ||
368 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | 428 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ |
369 | struct btrfs_ioctl_get_dev_stats) | 429 | struct btrfs_ioctl_get_dev_stats) |
370 | #endif | 430 | #endif |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c new file mode 100644 index 000000000000..bc424ae5a81a --- /dev/null +++ b/fs/btrfs/qgroup.c | |||
@@ -0,0 +1,1571 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/writeback.h> | ||
22 | #include <linux/blkdev.h> | ||
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | |||
27 | #include "ctree.h" | ||
28 | #include "transaction.h" | ||
29 | #include "disk-io.h" | ||
30 | #include "locking.h" | ||
31 | #include "ulist.h" | ||
32 | #include "ioctl.h" | ||
33 | #include "backref.h" | ||
34 | |||
35 | /* TODO XXX FIXME | ||
36 | * - subvol delete -> delete when ref goes to 0? delete limits also? | ||
37 | * - reorganize keys | ||
38 | * - compressed | ||
39 | * - sync | ||
40 | * - rescan | ||
41 | * - copy also limits on subvol creation | ||
42 | * - limit | ||
43 | * - caches fuer ulists | ||
44 | * - performance benchmarks | ||
45 | * - check all ioctl parameters | ||
46 | */ | ||
47 | |||
48 | /* | ||
49 | * one struct for each qgroup, organized in fs_info->qgroup_tree. | ||
50 | */ | ||
51 | struct btrfs_qgroup { | ||
52 | u64 qgroupid; | ||
53 | |||
54 | /* | ||
55 | * state | ||
56 | */ | ||
57 | u64 rfer; /* referenced */ | ||
58 | u64 rfer_cmpr; /* referenced compressed */ | ||
59 | u64 excl; /* exclusive */ | ||
60 | u64 excl_cmpr; /* exclusive compressed */ | ||
61 | |||
62 | /* | ||
63 | * limits | ||
64 | */ | ||
65 | u64 lim_flags; /* which limits are set */ | ||
66 | u64 max_rfer; | ||
67 | u64 max_excl; | ||
68 | u64 rsv_rfer; | ||
69 | u64 rsv_excl; | ||
70 | |||
71 | /* | ||
72 | * reservation tracking | ||
73 | */ | ||
74 | u64 reserved; | ||
75 | |||
76 | /* | ||
77 | * lists | ||
78 | */ | ||
79 | struct list_head groups; /* groups this group is member of */ | ||
80 | struct list_head members; /* groups that are members of this group */ | ||
81 | struct list_head dirty; /* dirty groups */ | ||
82 | struct rb_node node; /* tree of qgroups */ | ||
83 | |||
84 | /* | ||
85 | * temp variables for accounting operations | ||
86 | */ | ||
87 | u64 tag; | ||
88 | u64 refcnt; | ||
89 | }; | ||
90 | |||
91 | /* | ||
92 | * glue structure to represent the relations between qgroups. | ||
93 | */ | ||
94 | struct btrfs_qgroup_list { | ||
95 | struct list_head next_group; | ||
96 | struct list_head next_member; | ||
97 | struct btrfs_qgroup *group; | ||
98 | struct btrfs_qgroup *member; | ||
99 | }; | ||
100 | |||
101 | /* must be called with qgroup_lock held */ | ||
102 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, | ||
103 | u64 qgroupid) | ||
104 | { | ||
105 | struct rb_node *n = fs_info->qgroup_tree.rb_node; | ||
106 | struct btrfs_qgroup *qgroup; | ||
107 | |||
108 | while (n) { | ||
109 | qgroup = rb_entry(n, struct btrfs_qgroup, node); | ||
110 | if (qgroup->qgroupid < qgroupid) | ||
111 | n = n->rb_left; | ||
112 | else if (qgroup->qgroupid > qgroupid) | ||
113 | n = n->rb_right; | ||
114 | else | ||
115 | return qgroup; | ||
116 | } | ||
117 | return NULL; | ||
118 | } | ||
119 | |||
120 | /* must be called with qgroup_lock held */ | ||
121 | static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, | ||
122 | u64 qgroupid) | ||
123 | { | ||
124 | struct rb_node **p = &fs_info->qgroup_tree.rb_node; | ||
125 | struct rb_node *parent = NULL; | ||
126 | struct btrfs_qgroup *qgroup; | ||
127 | |||
128 | while (*p) { | ||
129 | parent = *p; | ||
130 | qgroup = rb_entry(parent, struct btrfs_qgroup, node); | ||
131 | |||
132 | if (qgroup->qgroupid < qgroupid) | ||
133 | p = &(*p)->rb_left; | ||
134 | else if (qgroup->qgroupid > qgroupid) | ||
135 | p = &(*p)->rb_right; | ||
136 | else | ||
137 | return qgroup; | ||
138 | } | ||
139 | |||
140 | qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); | ||
141 | if (!qgroup) | ||
142 | return ERR_PTR(-ENOMEM); | ||
143 | |||
144 | qgroup->qgroupid = qgroupid; | ||
145 | INIT_LIST_HEAD(&qgroup->groups); | ||
146 | INIT_LIST_HEAD(&qgroup->members); | ||
147 | INIT_LIST_HEAD(&qgroup->dirty); | ||
148 | |||
149 | rb_link_node(&qgroup->node, parent, p); | ||
150 | rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); | ||
151 | |||
152 | return qgroup; | ||
153 | } | ||
154 | |||
155 | /* must be called with qgroup_lock held */ | ||
156 | static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) | ||
157 | { | ||
158 | struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); | ||
159 | struct btrfs_qgroup_list *list; | ||
160 | |||
161 | if (!qgroup) | ||
162 | return -ENOENT; | ||
163 | |||
164 | rb_erase(&qgroup->node, &fs_info->qgroup_tree); | ||
165 | list_del(&qgroup->dirty); | ||
166 | |||
167 | while (!list_empty(&qgroup->groups)) { | ||
168 | list = list_first_entry(&qgroup->groups, | ||
169 | struct btrfs_qgroup_list, next_group); | ||
170 | list_del(&list->next_group); | ||
171 | list_del(&list->next_member); | ||
172 | kfree(list); | ||
173 | } | ||
174 | |||
175 | while (!list_empty(&qgroup->members)) { | ||
176 | list = list_first_entry(&qgroup->members, | ||
177 | struct btrfs_qgroup_list, next_member); | ||
178 | list_del(&list->next_group); | ||
179 | list_del(&list->next_member); | ||
180 | kfree(list); | ||
181 | } | ||
182 | kfree(qgroup); | ||
183 | |||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | /* must be called with qgroup_lock held */ | ||
188 | static int add_relation_rb(struct btrfs_fs_info *fs_info, | ||
189 | u64 memberid, u64 parentid) | ||
190 | { | ||
191 | struct btrfs_qgroup *member; | ||
192 | struct btrfs_qgroup *parent; | ||
193 | struct btrfs_qgroup_list *list; | ||
194 | |||
195 | member = find_qgroup_rb(fs_info, memberid); | ||
196 | parent = find_qgroup_rb(fs_info, parentid); | ||
197 | if (!member || !parent) | ||
198 | return -ENOENT; | ||
199 | |||
200 | list = kzalloc(sizeof(*list), GFP_ATOMIC); | ||
201 | if (!list) | ||
202 | return -ENOMEM; | ||
203 | |||
204 | list->group = parent; | ||
205 | list->member = member; | ||
206 | list_add_tail(&list->next_group, &member->groups); | ||
207 | list_add_tail(&list->next_member, &parent->members); | ||
208 | |||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | /* must be called with qgroup_lock held */ | ||
213 | static int del_relation_rb(struct btrfs_fs_info *fs_info, | ||
214 | u64 memberid, u64 parentid) | ||
215 | { | ||
216 | struct btrfs_qgroup *member; | ||
217 | struct btrfs_qgroup *parent; | ||
218 | struct btrfs_qgroup_list *list; | ||
219 | |||
220 | member = find_qgroup_rb(fs_info, memberid); | ||
221 | parent = find_qgroup_rb(fs_info, parentid); | ||
222 | if (!member || !parent) | ||
223 | return -ENOENT; | ||
224 | |||
225 | list_for_each_entry(list, &member->groups, next_group) { | ||
226 | if (list->group == parent) { | ||
227 | list_del(&list->next_group); | ||
228 | list_del(&list->next_member); | ||
229 | kfree(list); | ||
230 | return 0; | ||
231 | } | ||
232 | } | ||
233 | return -ENOENT; | ||
234 | } | ||
235 | |||
236 | /* | ||
237 | * The full config is read in one go, only called from open_ctree() | ||
238 | * It doesn't use any locking, as at this point we're still single-threaded | ||
239 | */ | ||
240 | int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) | ||
241 | { | ||
242 | struct btrfs_key key; | ||
243 | struct btrfs_key found_key; | ||
244 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
245 | struct btrfs_path *path = NULL; | ||
246 | struct extent_buffer *l; | ||
247 | int slot; | ||
248 | int ret = 0; | ||
249 | u64 flags = 0; | ||
250 | |||
251 | if (!fs_info->quota_enabled) | ||
252 | return 0; | ||
253 | |||
254 | path = btrfs_alloc_path(); | ||
255 | if (!path) { | ||
256 | ret = -ENOMEM; | ||
257 | goto out; | ||
258 | } | ||
259 | |||
260 | /* default this to quota off, in case no status key is found */ | ||
261 | fs_info->qgroup_flags = 0; | ||
262 | |||
263 | /* | ||
264 | * pass 1: read status, all qgroup infos and limits | ||
265 | */ | ||
266 | key.objectid = 0; | ||
267 | key.type = 0; | ||
268 | key.offset = 0; | ||
269 | ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); | ||
270 | if (ret) | ||
271 | goto out; | ||
272 | |||
273 | while (1) { | ||
274 | struct btrfs_qgroup *qgroup; | ||
275 | |||
276 | slot = path->slots[0]; | ||
277 | l = path->nodes[0]; | ||
278 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
279 | |||
280 | if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { | ||
281 | struct btrfs_qgroup_status_item *ptr; | ||
282 | |||
283 | ptr = btrfs_item_ptr(l, slot, | ||
284 | struct btrfs_qgroup_status_item); | ||
285 | |||
286 | if (btrfs_qgroup_status_version(l, ptr) != | ||
287 | BTRFS_QGROUP_STATUS_VERSION) { | ||
288 | printk(KERN_ERR | ||
289 | "btrfs: old qgroup version, quota disabled\n"); | ||
290 | goto out; | ||
291 | } | ||
292 | if (btrfs_qgroup_status_generation(l, ptr) != | ||
293 | fs_info->generation) { | ||
294 | flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
295 | printk(KERN_ERR | ||
296 | "btrfs: qgroup generation mismatch, " | ||
297 | "marked as inconsistent\n"); | ||
298 | } | ||
299 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, | ||
300 | ptr); | ||
301 | /* FIXME read scan element */ | ||
302 | goto next1; | ||
303 | } | ||
304 | |||
305 | if (found_key.type != BTRFS_QGROUP_INFO_KEY && | ||
306 | found_key.type != BTRFS_QGROUP_LIMIT_KEY) | ||
307 | goto next1; | ||
308 | |||
309 | qgroup = find_qgroup_rb(fs_info, found_key.offset); | ||
310 | if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || | ||
311 | (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { | ||
312 | printk(KERN_ERR "btrfs: inconsitent qgroup config\n"); | ||
313 | flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
314 | } | ||
315 | if (!qgroup) { | ||
316 | qgroup = add_qgroup_rb(fs_info, found_key.offset); | ||
317 | if (IS_ERR(qgroup)) { | ||
318 | ret = PTR_ERR(qgroup); | ||
319 | goto out; | ||
320 | } | ||
321 | } | ||
322 | switch (found_key.type) { | ||
323 | case BTRFS_QGROUP_INFO_KEY: { | ||
324 | struct btrfs_qgroup_info_item *ptr; | ||
325 | |||
326 | ptr = btrfs_item_ptr(l, slot, | ||
327 | struct btrfs_qgroup_info_item); | ||
328 | qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); | ||
329 | qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); | ||
330 | qgroup->excl = btrfs_qgroup_info_excl(l, ptr); | ||
331 | qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); | ||
332 | /* generation currently unused */ | ||
333 | break; | ||
334 | } | ||
335 | case BTRFS_QGROUP_LIMIT_KEY: { | ||
336 | struct btrfs_qgroup_limit_item *ptr; | ||
337 | |||
338 | ptr = btrfs_item_ptr(l, slot, | ||
339 | struct btrfs_qgroup_limit_item); | ||
340 | qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); | ||
341 | qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); | ||
342 | qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); | ||
343 | qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); | ||
344 | qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); | ||
345 | break; | ||
346 | } | ||
347 | } | ||
348 | next1: | ||
349 | ret = btrfs_next_item(quota_root, path); | ||
350 | if (ret < 0) | ||
351 | goto out; | ||
352 | if (ret) | ||
353 | break; | ||
354 | } | ||
355 | btrfs_release_path(path); | ||
356 | |||
357 | /* | ||
358 | * pass 2: read all qgroup relations | ||
359 | */ | ||
360 | key.objectid = 0; | ||
361 | key.type = BTRFS_QGROUP_RELATION_KEY; | ||
362 | key.offset = 0; | ||
363 | ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); | ||
364 | if (ret) | ||
365 | goto out; | ||
366 | while (1) { | ||
367 | slot = path->slots[0]; | ||
368 | l = path->nodes[0]; | ||
369 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
370 | |||
371 | if (found_key.type != BTRFS_QGROUP_RELATION_KEY) | ||
372 | goto next2; | ||
373 | |||
374 | if (found_key.objectid > found_key.offset) { | ||
375 | /* parent <- member, not needed to build config */ | ||
376 | /* FIXME should we omit the key completely? */ | ||
377 | goto next2; | ||
378 | } | ||
379 | |||
380 | ret = add_relation_rb(fs_info, found_key.objectid, | ||
381 | found_key.offset); | ||
382 | if (ret) | ||
383 | goto out; | ||
384 | next2: | ||
385 | ret = btrfs_next_item(quota_root, path); | ||
386 | if (ret < 0) | ||
387 | goto out; | ||
388 | if (ret) | ||
389 | break; | ||
390 | } | ||
391 | out: | ||
392 | fs_info->qgroup_flags |= flags; | ||
393 | if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { | ||
394 | fs_info->quota_enabled = 0; | ||
395 | fs_info->pending_quota_state = 0; | ||
396 | } | ||
397 | btrfs_free_path(path); | ||
398 | |||
399 | return ret < 0 ? ret : 0; | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * This is only called from close_ctree() or open_ctree(), both in single- | ||
404 | * treaded paths. Clean up the in-memory structures. No locking needed. | ||
405 | */ | ||
406 | void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) | ||
407 | { | ||
408 | struct rb_node *n; | ||
409 | struct btrfs_qgroup *qgroup; | ||
410 | struct btrfs_qgroup_list *list; | ||
411 | |||
412 | while ((n = rb_first(&fs_info->qgroup_tree))) { | ||
413 | qgroup = rb_entry(n, struct btrfs_qgroup, node); | ||
414 | rb_erase(n, &fs_info->qgroup_tree); | ||
415 | |||
416 | WARN_ON(!list_empty(&qgroup->dirty)); | ||
417 | |||
418 | while (!list_empty(&qgroup->groups)) { | ||
419 | list = list_first_entry(&qgroup->groups, | ||
420 | struct btrfs_qgroup_list, | ||
421 | next_group); | ||
422 | list_del(&list->next_group); | ||
423 | list_del(&list->next_member); | ||
424 | kfree(list); | ||
425 | } | ||
426 | |||
427 | while (!list_empty(&qgroup->members)) { | ||
428 | list = list_first_entry(&qgroup->members, | ||
429 | struct btrfs_qgroup_list, | ||
430 | next_member); | ||
431 | list_del(&list->next_group); | ||
432 | list_del(&list->next_member); | ||
433 | kfree(list); | ||
434 | } | ||
435 | kfree(qgroup); | ||
436 | } | ||
437 | } | ||
438 | |||
439 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, | ||
440 | struct btrfs_root *quota_root, | ||
441 | u64 src, u64 dst) | ||
442 | { | ||
443 | int ret; | ||
444 | struct btrfs_path *path; | ||
445 | struct btrfs_key key; | ||
446 | |||
447 | path = btrfs_alloc_path(); | ||
448 | if (!path) | ||
449 | return -ENOMEM; | ||
450 | |||
451 | key.objectid = src; | ||
452 | key.type = BTRFS_QGROUP_RELATION_KEY; | ||
453 | key.offset = dst; | ||
454 | |||
455 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); | ||
456 | |||
457 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
458 | |||
459 | btrfs_free_path(path); | ||
460 | return ret; | ||
461 | } | ||
462 | |||
463 | static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, | ||
464 | struct btrfs_root *quota_root, | ||
465 | u64 src, u64 dst) | ||
466 | { | ||
467 | int ret; | ||
468 | struct btrfs_path *path; | ||
469 | struct btrfs_key key; | ||
470 | |||
471 | path = btrfs_alloc_path(); | ||
472 | if (!path) | ||
473 | return -ENOMEM; | ||
474 | |||
475 | key.objectid = src; | ||
476 | key.type = BTRFS_QGROUP_RELATION_KEY; | ||
477 | key.offset = dst; | ||
478 | |||
479 | ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); | ||
480 | if (ret < 0) | ||
481 | goto out; | ||
482 | |||
483 | if (ret > 0) { | ||
484 | ret = -ENOENT; | ||
485 | goto out; | ||
486 | } | ||
487 | |||
488 | ret = btrfs_del_item(trans, quota_root, path); | ||
489 | out: | ||
490 | btrfs_free_path(path); | ||
491 | return ret; | ||
492 | } | ||
493 | |||
494 | static int add_qgroup_item(struct btrfs_trans_handle *trans, | ||
495 | struct btrfs_root *quota_root, u64 qgroupid) | ||
496 | { | ||
497 | int ret; | ||
498 | struct btrfs_path *path; | ||
499 | struct btrfs_qgroup_info_item *qgroup_info; | ||
500 | struct btrfs_qgroup_limit_item *qgroup_limit; | ||
501 | struct extent_buffer *leaf; | ||
502 | struct btrfs_key key; | ||
503 | |||
504 | path = btrfs_alloc_path(); | ||
505 | if (!path) | ||
506 | return -ENOMEM; | ||
507 | |||
508 | key.objectid = 0; | ||
509 | key.type = BTRFS_QGROUP_INFO_KEY; | ||
510 | key.offset = qgroupid; | ||
511 | |||
512 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | ||
513 | sizeof(*qgroup_info)); | ||
514 | if (ret) | ||
515 | goto out; | ||
516 | |||
517 | leaf = path->nodes[0]; | ||
518 | qgroup_info = btrfs_item_ptr(leaf, path->slots[0], | ||
519 | struct btrfs_qgroup_info_item); | ||
520 | btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); | ||
521 | btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); | ||
522 | btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); | ||
523 | btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); | ||
524 | btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); | ||
525 | |||
526 | btrfs_mark_buffer_dirty(leaf); | ||
527 | |||
528 | btrfs_release_path(path); | ||
529 | |||
530 | key.type = BTRFS_QGROUP_LIMIT_KEY; | ||
531 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | ||
532 | sizeof(*qgroup_limit)); | ||
533 | if (ret) | ||
534 | goto out; | ||
535 | |||
536 | leaf = path->nodes[0]; | ||
537 | qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], | ||
538 | struct btrfs_qgroup_limit_item); | ||
539 | btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); | ||
540 | btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); | ||
541 | btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); | ||
542 | btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); | ||
543 | btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); | ||
544 | |||
545 | btrfs_mark_buffer_dirty(leaf); | ||
546 | |||
547 | ret = 0; | ||
548 | out: | ||
549 | btrfs_free_path(path); | ||
550 | return ret; | ||
551 | } | ||
552 | |||
553 | static int del_qgroup_item(struct btrfs_trans_handle *trans, | ||
554 | struct btrfs_root *quota_root, u64 qgroupid) | ||
555 | { | ||
556 | int ret; | ||
557 | struct btrfs_path *path; | ||
558 | struct btrfs_key key; | ||
559 | |||
560 | path = btrfs_alloc_path(); | ||
561 | if (!path) | ||
562 | return -ENOMEM; | ||
563 | |||
564 | key.objectid = 0; | ||
565 | key.type = BTRFS_QGROUP_INFO_KEY; | ||
566 | key.offset = qgroupid; | ||
567 | ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); | ||
568 | if (ret < 0) | ||
569 | goto out; | ||
570 | |||
571 | if (ret > 0) { | ||
572 | ret = -ENOENT; | ||
573 | goto out; | ||
574 | } | ||
575 | |||
576 | ret = btrfs_del_item(trans, quota_root, path); | ||
577 | if (ret) | ||
578 | goto out; | ||
579 | |||
580 | btrfs_release_path(path); | ||
581 | |||
582 | key.type = BTRFS_QGROUP_LIMIT_KEY; | ||
583 | ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); | ||
584 | if (ret < 0) | ||
585 | goto out; | ||
586 | |||
587 | if (ret > 0) { | ||
588 | ret = -ENOENT; | ||
589 | goto out; | ||
590 | } | ||
591 | |||
592 | ret = btrfs_del_item(trans, quota_root, path); | ||
593 | |||
594 | out: | ||
595 | btrfs_free_path(path); | ||
596 | return ret; | ||
597 | } | ||
598 | |||
599 | static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, | ||
600 | struct btrfs_root *root, u64 qgroupid, | ||
601 | u64 flags, u64 max_rfer, u64 max_excl, | ||
602 | u64 rsv_rfer, u64 rsv_excl) | ||
603 | { | ||
604 | struct btrfs_path *path; | ||
605 | struct btrfs_key key; | ||
606 | struct extent_buffer *l; | ||
607 | struct btrfs_qgroup_limit_item *qgroup_limit; | ||
608 | int ret; | ||
609 | int slot; | ||
610 | |||
611 | key.objectid = 0; | ||
612 | key.type = BTRFS_QGROUP_LIMIT_KEY; | ||
613 | key.offset = qgroupid; | ||
614 | |||
615 | path = btrfs_alloc_path(); | ||
616 | BUG_ON(!path); | ||
617 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
618 | if (ret > 0) | ||
619 | ret = -ENOENT; | ||
620 | |||
621 | if (ret) | ||
622 | goto out; | ||
623 | |||
624 | l = path->nodes[0]; | ||
625 | slot = path->slots[0]; | ||
626 | qgroup_limit = btrfs_item_ptr(l, path->slots[0], | ||
627 | struct btrfs_qgroup_limit_item); | ||
628 | btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); | ||
629 | btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); | ||
630 | btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); | ||
631 | btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); | ||
632 | btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); | ||
633 | |||
634 | btrfs_mark_buffer_dirty(l); | ||
635 | |||
636 | out: | ||
637 | btrfs_free_path(path); | ||
638 | return ret; | ||
639 | } | ||
640 | |||
641 | static int update_qgroup_info_item(struct btrfs_trans_handle *trans, | ||
642 | struct btrfs_root *root, | ||
643 | struct btrfs_qgroup *qgroup) | ||
644 | { | ||
645 | struct btrfs_path *path; | ||
646 | struct btrfs_key key; | ||
647 | struct extent_buffer *l; | ||
648 | struct btrfs_qgroup_info_item *qgroup_info; | ||
649 | int ret; | ||
650 | int slot; | ||
651 | |||
652 | key.objectid = 0; | ||
653 | key.type = BTRFS_QGROUP_INFO_KEY; | ||
654 | key.offset = qgroup->qgroupid; | ||
655 | |||
656 | path = btrfs_alloc_path(); | ||
657 | BUG_ON(!path); | ||
658 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
659 | if (ret > 0) | ||
660 | ret = -ENOENT; | ||
661 | |||
662 | if (ret) | ||
663 | goto out; | ||
664 | |||
665 | l = path->nodes[0]; | ||
666 | slot = path->slots[0]; | ||
667 | qgroup_info = btrfs_item_ptr(l, path->slots[0], | ||
668 | struct btrfs_qgroup_info_item); | ||
669 | btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); | ||
670 | btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); | ||
671 | btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); | ||
672 | btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); | ||
673 | btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); | ||
674 | |||
675 | btrfs_mark_buffer_dirty(l); | ||
676 | |||
677 | out: | ||
678 | btrfs_free_path(path); | ||
679 | return ret; | ||
680 | } | ||
681 | |||
682 | static int update_qgroup_status_item(struct btrfs_trans_handle *trans, | ||
683 | struct btrfs_fs_info *fs_info, | ||
684 | struct btrfs_root *root) | ||
685 | { | ||
686 | struct btrfs_path *path; | ||
687 | struct btrfs_key key; | ||
688 | struct extent_buffer *l; | ||
689 | struct btrfs_qgroup_status_item *ptr; | ||
690 | int ret; | ||
691 | int slot; | ||
692 | |||
693 | key.objectid = 0; | ||
694 | key.type = BTRFS_QGROUP_STATUS_KEY; | ||
695 | key.offset = 0; | ||
696 | |||
697 | path = btrfs_alloc_path(); | ||
698 | BUG_ON(!path); | ||
699 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
700 | if (ret > 0) | ||
701 | ret = -ENOENT; | ||
702 | |||
703 | if (ret) | ||
704 | goto out; | ||
705 | |||
706 | l = path->nodes[0]; | ||
707 | slot = path->slots[0]; | ||
708 | ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); | ||
709 | btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); | ||
710 | btrfs_set_qgroup_status_generation(l, ptr, trans->transid); | ||
711 | /* XXX scan */ | ||
712 | |||
713 | btrfs_mark_buffer_dirty(l); | ||
714 | |||
715 | out: | ||
716 | btrfs_free_path(path); | ||
717 | return ret; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * called with qgroup_lock held | ||
722 | */ | ||
723 | static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, | ||
724 | struct btrfs_root *root) | ||
725 | { | ||
726 | struct btrfs_path *path; | ||
727 | struct btrfs_key key; | ||
728 | int ret; | ||
729 | |||
730 | if (!root) | ||
731 | return -EINVAL; | ||
732 | |||
733 | path = btrfs_alloc_path(); | ||
734 | if (!path) | ||
735 | return -ENOMEM; | ||
736 | |||
737 | while (1) { | ||
738 | key.objectid = 0; | ||
739 | key.offset = 0; | ||
740 | key.type = 0; | ||
741 | |||
742 | path->leave_spinning = 1; | ||
743 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
744 | if (ret > 0) { | ||
745 | if (path->slots[0] == 0) | ||
746 | break; | ||
747 | path->slots[0]--; | ||
748 | } else if (ret < 0) { | ||
749 | break; | ||
750 | } | ||
751 | |||
752 | ret = btrfs_del_item(trans, root, path); | ||
753 | if (ret) | ||
754 | goto out; | ||
755 | btrfs_release_path(path); | ||
756 | } | ||
757 | ret = 0; | ||
758 | out: | ||
759 | root->fs_info->pending_quota_state = 0; | ||
760 | btrfs_free_path(path); | ||
761 | return ret; | ||
762 | } | ||
763 | |||
764 | int btrfs_quota_enable(struct btrfs_trans_handle *trans, | ||
765 | struct btrfs_fs_info *fs_info) | ||
766 | { | ||
767 | struct btrfs_root *quota_root; | ||
768 | struct btrfs_path *path = NULL; | ||
769 | struct btrfs_qgroup_status_item *ptr; | ||
770 | struct extent_buffer *leaf; | ||
771 | struct btrfs_key key; | ||
772 | int ret = 0; | ||
773 | |||
774 | spin_lock(&fs_info->qgroup_lock); | ||
775 | if (fs_info->quota_root) { | ||
776 | fs_info->pending_quota_state = 1; | ||
777 | spin_unlock(&fs_info->qgroup_lock); | ||
778 | goto out; | ||
779 | } | ||
780 | spin_unlock(&fs_info->qgroup_lock); | ||
781 | |||
782 | /* | ||
783 | * initially create the quota tree | ||
784 | */ | ||
785 | quota_root = btrfs_create_tree(trans, fs_info, | ||
786 | BTRFS_QUOTA_TREE_OBJECTID); | ||
787 | if (IS_ERR(quota_root)) { | ||
788 | ret = PTR_ERR(quota_root); | ||
789 | goto out; | ||
790 | } | ||
791 | |||
792 | path = btrfs_alloc_path(); | ||
793 | if (!path) | ||
794 | return -ENOMEM; | ||
795 | |||
796 | key.objectid = 0; | ||
797 | key.type = BTRFS_QGROUP_STATUS_KEY; | ||
798 | key.offset = 0; | ||
799 | |||
800 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | ||
801 | sizeof(*ptr)); | ||
802 | if (ret) | ||
803 | goto out; | ||
804 | |||
805 | leaf = path->nodes[0]; | ||
806 | ptr = btrfs_item_ptr(leaf, path->slots[0], | ||
807 | struct btrfs_qgroup_status_item); | ||
808 | btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); | ||
809 | btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); | ||
810 | fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | | ||
811 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
812 | btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); | ||
813 | btrfs_set_qgroup_status_scan(leaf, ptr, 0); | ||
814 | |||
815 | btrfs_mark_buffer_dirty(leaf); | ||
816 | |||
817 | spin_lock(&fs_info->qgroup_lock); | ||
818 | fs_info->quota_root = quota_root; | ||
819 | fs_info->pending_quota_state = 1; | ||
820 | spin_unlock(&fs_info->qgroup_lock); | ||
821 | out: | ||
822 | btrfs_free_path(path); | ||
823 | return ret; | ||
824 | } | ||
825 | |||
826 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | ||
827 | struct btrfs_fs_info *fs_info) | ||
828 | { | ||
829 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
830 | struct btrfs_root *quota_root; | ||
831 | int ret = 0; | ||
832 | |||
833 | spin_lock(&fs_info->qgroup_lock); | ||
834 | fs_info->quota_enabled = 0; | ||
835 | fs_info->pending_quota_state = 0; | ||
836 | quota_root = fs_info->quota_root; | ||
837 | fs_info->quota_root = NULL; | ||
838 | btrfs_free_qgroup_config(fs_info); | ||
839 | spin_unlock(&fs_info->qgroup_lock); | ||
840 | |||
841 | if (!quota_root) | ||
842 | return -EINVAL; | ||
843 | |||
844 | ret = btrfs_clean_quota_tree(trans, quota_root); | ||
845 | if (ret) | ||
846 | goto out; | ||
847 | |||
848 | ret = btrfs_del_root(trans, tree_root, "a_root->root_key); | ||
849 | if (ret) | ||
850 | goto out; | ||
851 | |||
852 | list_del("a_root->dirty_list); | ||
853 | |||
854 | btrfs_tree_lock(quota_root->node); | ||
855 | clean_tree_block(trans, tree_root, quota_root->node); | ||
856 | btrfs_tree_unlock(quota_root->node); | ||
857 | btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); | ||
858 | |||
859 | free_extent_buffer(quota_root->node); | ||
860 | free_extent_buffer(quota_root->commit_root); | ||
861 | kfree(quota_root); | ||
862 | out: | ||
863 | return ret; | ||
864 | } | ||
865 | |||
866 | int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) | ||
867 | { | ||
868 | /* FIXME */ | ||
869 | return 0; | ||
870 | } | ||
871 | |||
872 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | ||
873 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) | ||
874 | { | ||
875 | struct btrfs_root *quota_root; | ||
876 | int ret = 0; | ||
877 | |||
878 | quota_root = fs_info->quota_root; | ||
879 | if (!quota_root) | ||
880 | return -EINVAL; | ||
881 | |||
882 | ret = add_qgroup_relation_item(trans, quota_root, src, dst); | ||
883 | if (ret) | ||
884 | return ret; | ||
885 | |||
886 | ret = add_qgroup_relation_item(trans, quota_root, dst, src); | ||
887 | if (ret) { | ||
888 | del_qgroup_relation_item(trans, quota_root, src, dst); | ||
889 | return ret; | ||
890 | } | ||
891 | |||
892 | spin_lock(&fs_info->qgroup_lock); | ||
893 | ret = add_relation_rb(quota_root->fs_info, src, dst); | ||
894 | spin_unlock(&fs_info->qgroup_lock); | ||
895 | |||
896 | return ret; | ||
897 | } | ||
898 | |||
899 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
900 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) | ||
901 | { | ||
902 | struct btrfs_root *quota_root; | ||
903 | int ret = 0; | ||
904 | int err; | ||
905 | |||
906 | quota_root = fs_info->quota_root; | ||
907 | if (!quota_root) | ||
908 | return -EINVAL; | ||
909 | |||
910 | ret = del_qgroup_relation_item(trans, quota_root, src, dst); | ||
911 | err = del_qgroup_relation_item(trans, quota_root, dst, src); | ||
912 | if (err && !ret) | ||
913 | ret = err; | ||
914 | |||
915 | spin_lock(&fs_info->qgroup_lock); | ||
916 | del_relation_rb(fs_info, src, dst); | ||
917 | |||
918 | spin_unlock(&fs_info->qgroup_lock); | ||
919 | |||
920 | return ret; | ||
921 | } | ||
922 | |||
923 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | ||
924 | struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) | ||
925 | { | ||
926 | struct btrfs_root *quota_root; | ||
927 | struct btrfs_qgroup *qgroup; | ||
928 | int ret = 0; | ||
929 | |||
930 | quota_root = fs_info->quota_root; | ||
931 | if (!quota_root) | ||
932 | return -EINVAL; | ||
933 | |||
934 | ret = add_qgroup_item(trans, quota_root, qgroupid); | ||
935 | |||
936 | spin_lock(&fs_info->qgroup_lock); | ||
937 | qgroup = add_qgroup_rb(fs_info, qgroupid); | ||
938 | spin_unlock(&fs_info->qgroup_lock); | ||
939 | |||
940 | if (IS_ERR(qgroup)) | ||
941 | ret = PTR_ERR(qgroup); | ||
942 | |||
943 | return ret; | ||
944 | } | ||
945 | |||
946 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | ||
947 | struct btrfs_fs_info *fs_info, u64 qgroupid) | ||
948 | { | ||
949 | struct btrfs_root *quota_root; | ||
950 | int ret = 0; | ||
951 | |||
952 | quota_root = fs_info->quota_root; | ||
953 | if (!quota_root) | ||
954 | return -EINVAL; | ||
955 | |||
956 | ret = del_qgroup_item(trans, quota_root, qgroupid); | ||
957 | |||
958 | spin_lock(&fs_info->qgroup_lock); | ||
959 | del_qgroup_rb(quota_root->fs_info, qgroupid); | ||
960 | |||
961 | spin_unlock(&fs_info->qgroup_lock); | ||
962 | |||
963 | return ret; | ||
964 | } | ||
965 | |||
966 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | ||
967 | struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
968 | struct btrfs_qgroup_limit *limit) | ||
969 | { | ||
970 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
971 | struct btrfs_qgroup *qgroup; | ||
972 | int ret = 0; | ||
973 | |||
974 | if (!quota_root) | ||
975 | return -EINVAL; | ||
976 | |||
977 | ret = update_qgroup_limit_item(trans, quota_root, qgroupid, | ||
978 | limit->flags, limit->max_rfer, | ||
979 | limit->max_excl, limit->rsv_rfer, | ||
980 | limit->rsv_excl); | ||
981 | if (ret) { | ||
982 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
983 | printk(KERN_INFO "unable to update quota limit for %llu\n", | ||
984 | (unsigned long long)qgroupid); | ||
985 | } | ||
986 | |||
987 | spin_lock(&fs_info->qgroup_lock); | ||
988 | |||
989 | qgroup = find_qgroup_rb(fs_info, qgroupid); | ||
990 | if (!qgroup) { | ||
991 | ret = -ENOENT; | ||
992 | goto unlock; | ||
993 | } | ||
994 | qgroup->lim_flags = limit->flags; | ||
995 | qgroup->max_rfer = limit->max_rfer; | ||
996 | qgroup->max_excl = limit->max_excl; | ||
997 | qgroup->rsv_rfer = limit->rsv_rfer; | ||
998 | qgroup->rsv_excl = limit->rsv_excl; | ||
999 | |||
1000 | unlock: | ||
1001 | spin_unlock(&fs_info->qgroup_lock); | ||
1002 | |||
1003 | return ret; | ||
1004 | } | ||
1005 | |||
1006 | static void qgroup_dirty(struct btrfs_fs_info *fs_info, | ||
1007 | struct btrfs_qgroup *qgroup) | ||
1008 | { | ||
1009 | if (list_empty(&qgroup->dirty)) | ||
1010 | list_add(&qgroup->dirty, &fs_info->dirty_qgroups); | ||
1011 | } | ||
1012 | |||
1013 | /* | ||
1014 | * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts | ||
1015 | * the modification into a list that's later used by btrfs_end_transaction to | ||
1016 | * pass the recorded modifications on to btrfs_qgroup_account_ref. | ||
1017 | */ | ||
1018 | int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | ||
1019 | struct btrfs_delayed_ref_node *node, | ||
1020 | struct btrfs_delayed_extent_op *extent_op) | ||
1021 | { | ||
1022 | struct qgroup_update *u; | ||
1023 | |||
1024 | BUG_ON(!trans->delayed_ref_elem.seq); | ||
1025 | u = kmalloc(sizeof(*u), GFP_NOFS); | ||
1026 | if (!u) | ||
1027 | return -ENOMEM; | ||
1028 | |||
1029 | u->node = node; | ||
1030 | u->extent_op = extent_op; | ||
1031 | list_add_tail(&u->list, &trans->qgroup_ref_list); | ||
1032 | |||
1033 | return 0; | ||
1034 | } | ||
1035 | |||
1036 | /* | ||
1037 | * btrfs_qgroup_account_ref is called for every ref that is added to or deleted | ||
1038 | * from the fs. First, all roots referencing the extent are searched, and | ||
1039 | * then the space is accounted accordingly to the different roots. The | ||
1040 | * accounting algorithm works in 3 steps documented inline. | ||
1041 | */ | ||
1042 | int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | ||
1043 | struct btrfs_fs_info *fs_info, | ||
1044 | struct btrfs_delayed_ref_node *node, | ||
1045 | struct btrfs_delayed_extent_op *extent_op) | ||
1046 | { | ||
1047 | struct btrfs_key ins; | ||
1048 | struct btrfs_root *quota_root; | ||
1049 | u64 ref_root; | ||
1050 | struct btrfs_qgroup *qgroup; | ||
1051 | struct ulist_node *unode; | ||
1052 | struct ulist *roots = NULL; | ||
1053 | struct ulist *tmp = NULL; | ||
1054 | struct ulist_iterator uiter; | ||
1055 | u64 seq; | ||
1056 | int ret = 0; | ||
1057 | int sgn; | ||
1058 | |||
1059 | if (!fs_info->quota_enabled) | ||
1060 | return 0; | ||
1061 | |||
1062 | BUG_ON(!fs_info->quota_root); | ||
1063 | |||
1064 | ins.objectid = node->bytenr; | ||
1065 | ins.offset = node->num_bytes; | ||
1066 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
1067 | |||
1068 | if (node->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
1069 | node->type == BTRFS_SHARED_BLOCK_REF_KEY) { | ||
1070 | struct btrfs_delayed_tree_ref *ref; | ||
1071 | ref = btrfs_delayed_node_to_tree_ref(node); | ||
1072 | ref_root = ref->root; | ||
1073 | } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || | ||
1074 | node->type == BTRFS_SHARED_DATA_REF_KEY) { | ||
1075 | struct btrfs_delayed_data_ref *ref; | ||
1076 | ref = btrfs_delayed_node_to_data_ref(node); | ||
1077 | ref_root = ref->root; | ||
1078 | } else { | ||
1079 | BUG(); | ||
1080 | } | ||
1081 | |||
1082 | if (!is_fstree(ref_root)) { | ||
1083 | /* | ||
1084 | * non-fs-trees are not being accounted | ||
1085 | */ | ||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | switch (node->action) { | ||
1090 | case BTRFS_ADD_DELAYED_REF: | ||
1091 | case BTRFS_ADD_DELAYED_EXTENT: | ||
1092 | sgn = 1; | ||
1093 | break; | ||
1094 | case BTRFS_DROP_DELAYED_REF: | ||
1095 | sgn = -1; | ||
1096 | break; | ||
1097 | case BTRFS_UPDATE_DELAYED_HEAD: | ||
1098 | return 0; | ||
1099 | default: | ||
1100 | BUG(); | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | * the delayed ref sequence number we pass depends on the direction of | ||
1105 | * the operation. for add operations, we pass (node->seq - 1) to skip | ||
1106 | * the delayed ref's current sequence number, because we need the state | ||
1107 | * of the tree before the add operation. for delete operations, we pass | ||
1108 | * (node->seq) to include the delayed ref's current sequence number, | ||
1109 | * because we need the state of the tree after the delete operation. | ||
1110 | */ | ||
1111 | ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, | ||
1112 | sgn > 0 ? node->seq - 1 : node->seq, &roots); | ||
1113 | if (ret < 0) | ||
1114 | goto out; | ||
1115 | |||
1116 | spin_lock(&fs_info->qgroup_lock); | ||
1117 | quota_root = fs_info->quota_root; | ||
1118 | if (!quota_root) | ||
1119 | goto unlock; | ||
1120 | |||
1121 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
1122 | if (!qgroup) | ||
1123 | goto unlock; | ||
1124 | |||
1125 | /* | ||
1126 | * step 1: for each old ref, visit all nodes once and inc refcnt | ||
1127 | */ | ||
1128 | tmp = ulist_alloc(GFP_ATOMIC); | ||
1129 | if (!tmp) { | ||
1130 | ret = -ENOMEM; | ||
1131 | goto unlock; | ||
1132 | } | ||
1133 | seq = fs_info->qgroup_seq; | ||
1134 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | ||
1135 | |||
1136 | ULIST_ITER_INIT(&uiter); | ||
1137 | while ((unode = ulist_next(roots, &uiter))) { | ||
1138 | struct ulist_node *tmp_unode; | ||
1139 | struct ulist_iterator tmp_uiter; | ||
1140 | struct btrfs_qgroup *qg; | ||
1141 | |||
1142 | qg = find_qgroup_rb(fs_info, unode->val); | ||
1143 | if (!qg) | ||
1144 | continue; | ||
1145 | |||
1146 | ulist_reinit(tmp); | ||
1147 | /* XXX id not needed */ | ||
1148 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | ||
1149 | ULIST_ITER_INIT(&tmp_uiter); | ||
1150 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | ||
1151 | struct btrfs_qgroup_list *glist; | ||
1152 | |||
1153 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | ||
1154 | if (qg->refcnt < seq) | ||
1155 | qg->refcnt = seq + 1; | ||
1156 | else | ||
1157 | ++qg->refcnt; | ||
1158 | |||
1159 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1160 | ulist_add(tmp, glist->group->qgroupid, | ||
1161 | (unsigned long)glist->group, | ||
1162 | GFP_ATOMIC); | ||
1163 | } | ||
1164 | } | ||
1165 | } | ||
1166 | |||
1167 | /* | ||
1168 | * step 2: walk from the new root | ||
1169 | */ | ||
1170 | ulist_reinit(tmp); | ||
1171 | ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | ||
1172 | ULIST_ITER_INIT(&uiter); | ||
1173 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1174 | struct btrfs_qgroup *qg; | ||
1175 | struct btrfs_qgroup_list *glist; | ||
1176 | |||
1177 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1178 | if (qg->refcnt < seq) { | ||
1179 | /* not visited by step 1 */ | ||
1180 | qg->rfer += sgn * node->num_bytes; | ||
1181 | qg->rfer_cmpr += sgn * node->num_bytes; | ||
1182 | if (roots->nnodes == 0) { | ||
1183 | qg->excl += sgn * node->num_bytes; | ||
1184 | qg->excl_cmpr += sgn * node->num_bytes; | ||
1185 | } | ||
1186 | qgroup_dirty(fs_info, qg); | ||
1187 | } | ||
1188 | WARN_ON(qg->tag >= seq); | ||
1189 | qg->tag = seq; | ||
1190 | |||
1191 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1192 | ulist_add(tmp, glist->group->qgroupid, | ||
1193 | (unsigned long)glist->group, GFP_ATOMIC); | ||
1194 | } | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
1198 | * step 3: walk again from old refs | ||
1199 | */ | ||
1200 | ULIST_ITER_INIT(&uiter); | ||
1201 | while ((unode = ulist_next(roots, &uiter))) { | ||
1202 | struct btrfs_qgroup *qg; | ||
1203 | struct ulist_node *tmp_unode; | ||
1204 | struct ulist_iterator tmp_uiter; | ||
1205 | |||
1206 | qg = find_qgroup_rb(fs_info, unode->val); | ||
1207 | if (!qg) | ||
1208 | continue; | ||
1209 | |||
1210 | ulist_reinit(tmp); | ||
1211 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | ||
1212 | ULIST_ITER_INIT(&tmp_uiter); | ||
1213 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | ||
1214 | struct btrfs_qgroup_list *glist; | ||
1215 | |||
1216 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | ||
1217 | if (qg->tag == seq) | ||
1218 | continue; | ||
1219 | |||
1220 | if (qg->refcnt - seq == roots->nnodes) { | ||
1221 | qg->excl -= sgn * node->num_bytes; | ||
1222 | qg->excl_cmpr -= sgn * node->num_bytes; | ||
1223 | qgroup_dirty(fs_info, qg); | ||
1224 | } | ||
1225 | |||
1226 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1227 | ulist_add(tmp, glist->group->qgroupid, | ||
1228 | (unsigned long)glist->group, | ||
1229 | GFP_ATOMIC); | ||
1230 | } | ||
1231 | } | ||
1232 | } | ||
1233 | ret = 0; | ||
1234 | unlock: | ||
1235 | spin_unlock(&fs_info->qgroup_lock); | ||
1236 | out: | ||
1237 | ulist_free(roots); | ||
1238 | ulist_free(tmp); | ||
1239 | |||
1240 | return ret; | ||
1241 | } | ||
1242 | |||
1243 | /* | ||
1244 | * called from commit_transaction. Writes all changed qgroups to disk. | ||
1245 | */ | ||
1246 | int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | ||
1247 | struct btrfs_fs_info *fs_info) | ||
1248 | { | ||
1249 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
1250 | int ret = 0; | ||
1251 | |||
1252 | if (!quota_root) | ||
1253 | goto out; | ||
1254 | |||
1255 | fs_info->quota_enabled = fs_info->pending_quota_state; | ||
1256 | |||
1257 | spin_lock(&fs_info->qgroup_lock); | ||
1258 | while (!list_empty(&fs_info->dirty_qgroups)) { | ||
1259 | struct btrfs_qgroup *qgroup; | ||
1260 | qgroup = list_first_entry(&fs_info->dirty_qgroups, | ||
1261 | struct btrfs_qgroup, dirty); | ||
1262 | list_del_init(&qgroup->dirty); | ||
1263 | spin_unlock(&fs_info->qgroup_lock); | ||
1264 | ret = update_qgroup_info_item(trans, quota_root, qgroup); | ||
1265 | if (ret) | ||
1266 | fs_info->qgroup_flags |= | ||
1267 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
1268 | spin_lock(&fs_info->qgroup_lock); | ||
1269 | } | ||
1270 | if (fs_info->quota_enabled) | ||
1271 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; | ||
1272 | else | ||
1273 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; | ||
1274 | spin_unlock(&fs_info->qgroup_lock); | ||
1275 | |||
1276 | ret = update_qgroup_status_item(trans, fs_info, quota_root); | ||
1277 | if (ret) | ||
1278 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
1279 | |||
1280 | out: | ||
1281 | |||
1282 | return ret; | ||
1283 | } | ||
1284 | |||
1285 | /* | ||
1286 | * copy the acounting information between qgroups. This is necessary when a | ||
1287 | * snapshot or a subvolume is created | ||
1288 | */ | ||
1289 | int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | ||
1290 | struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, | ||
1291 | struct btrfs_qgroup_inherit *inherit) | ||
1292 | { | ||
1293 | int ret = 0; | ||
1294 | int i; | ||
1295 | u64 *i_qgroups; | ||
1296 | struct btrfs_root *quota_root = fs_info->quota_root; | ||
1297 | struct btrfs_qgroup *srcgroup; | ||
1298 | struct btrfs_qgroup *dstgroup; | ||
1299 | u32 level_size = 0; | ||
1300 | |||
1301 | if (!fs_info->quota_enabled) | ||
1302 | return 0; | ||
1303 | |||
1304 | if (!quota_root) | ||
1305 | return -EINVAL; | ||
1306 | |||
1307 | /* | ||
1308 | * create a tracking group for the subvol itself | ||
1309 | */ | ||
1310 | ret = add_qgroup_item(trans, quota_root, objectid); | ||
1311 | if (ret) | ||
1312 | goto out; | ||
1313 | |||
1314 | if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { | ||
1315 | ret = update_qgroup_limit_item(trans, quota_root, objectid, | ||
1316 | inherit->lim.flags, | ||
1317 | inherit->lim.max_rfer, | ||
1318 | inherit->lim.max_excl, | ||
1319 | inherit->lim.rsv_rfer, | ||
1320 | inherit->lim.rsv_excl); | ||
1321 | if (ret) | ||
1322 | goto out; | ||
1323 | } | ||
1324 | |||
1325 | if (srcid) { | ||
1326 | struct btrfs_root *srcroot; | ||
1327 | struct btrfs_key srckey; | ||
1328 | int srcroot_level; | ||
1329 | |||
1330 | srckey.objectid = srcid; | ||
1331 | srckey.type = BTRFS_ROOT_ITEM_KEY; | ||
1332 | srckey.offset = (u64)-1; | ||
1333 | srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); | ||
1334 | if (IS_ERR(srcroot)) { | ||
1335 | ret = PTR_ERR(srcroot); | ||
1336 | goto out; | ||
1337 | } | ||
1338 | |||
1339 | rcu_read_lock(); | ||
1340 | srcroot_level = btrfs_header_level(srcroot->node); | ||
1341 | level_size = btrfs_level_size(srcroot, srcroot_level); | ||
1342 | rcu_read_unlock(); | ||
1343 | } | ||
1344 | |||
1345 | /* | ||
1346 | * add qgroup to all inherited groups | ||
1347 | */ | ||
1348 | if (inherit) { | ||
1349 | i_qgroups = (u64 *)(inherit + 1); | ||
1350 | for (i = 0; i < inherit->num_qgroups; ++i) { | ||
1351 | ret = add_qgroup_relation_item(trans, quota_root, | ||
1352 | objectid, *i_qgroups); | ||
1353 | if (ret) | ||
1354 | goto out; | ||
1355 | ret = add_qgroup_relation_item(trans, quota_root, | ||
1356 | *i_qgroups, objectid); | ||
1357 | if (ret) | ||
1358 | goto out; | ||
1359 | ++i_qgroups; | ||
1360 | } | ||
1361 | } | ||
1362 | |||
1363 | |||
1364 | spin_lock(&fs_info->qgroup_lock); | ||
1365 | |||
1366 | dstgroup = add_qgroup_rb(fs_info, objectid); | ||
1367 | if (!dstgroup) | ||
1368 | goto unlock; | ||
1369 | |||
1370 | if (srcid) { | ||
1371 | srcgroup = find_qgroup_rb(fs_info, srcid); | ||
1372 | if (!srcgroup) | ||
1373 | goto unlock; | ||
1374 | dstgroup->rfer = srcgroup->rfer - level_size; | ||
1375 | dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; | ||
1376 | srcgroup->excl = level_size; | ||
1377 | srcgroup->excl_cmpr = level_size; | ||
1378 | qgroup_dirty(fs_info, dstgroup); | ||
1379 | qgroup_dirty(fs_info, srcgroup); | ||
1380 | } | ||
1381 | |||
1382 | if (!inherit) | ||
1383 | goto unlock; | ||
1384 | |||
1385 | i_qgroups = (u64 *)(inherit + 1); | ||
1386 | for (i = 0; i < inherit->num_qgroups; ++i) { | ||
1387 | ret = add_relation_rb(quota_root->fs_info, objectid, | ||
1388 | *i_qgroups); | ||
1389 | if (ret) | ||
1390 | goto unlock; | ||
1391 | ++i_qgroups; | ||
1392 | } | ||
1393 | |||
1394 | for (i = 0; i < inherit->num_ref_copies; ++i) { | ||
1395 | struct btrfs_qgroup *src; | ||
1396 | struct btrfs_qgroup *dst; | ||
1397 | |||
1398 | src = find_qgroup_rb(fs_info, i_qgroups[0]); | ||
1399 | dst = find_qgroup_rb(fs_info, i_qgroups[1]); | ||
1400 | |||
1401 | if (!src || !dst) { | ||
1402 | ret = -EINVAL; | ||
1403 | goto unlock; | ||
1404 | } | ||
1405 | |||
1406 | dst->rfer = src->rfer - level_size; | ||
1407 | dst->rfer_cmpr = src->rfer_cmpr - level_size; | ||
1408 | i_qgroups += 2; | ||
1409 | } | ||
1410 | for (i = 0; i < inherit->num_excl_copies; ++i) { | ||
1411 | struct btrfs_qgroup *src; | ||
1412 | struct btrfs_qgroup *dst; | ||
1413 | |||
1414 | src = find_qgroup_rb(fs_info, i_qgroups[0]); | ||
1415 | dst = find_qgroup_rb(fs_info, i_qgroups[1]); | ||
1416 | |||
1417 | if (!src || !dst) { | ||
1418 | ret = -EINVAL; | ||
1419 | goto unlock; | ||
1420 | } | ||
1421 | |||
1422 | dst->excl = src->excl + level_size; | ||
1423 | dst->excl_cmpr = src->excl_cmpr + level_size; | ||
1424 | i_qgroups += 2; | ||
1425 | } | ||
1426 | |||
1427 | unlock: | ||
1428 | spin_unlock(&fs_info->qgroup_lock); | ||
1429 | out: | ||
1430 | return ret; | ||
1431 | } | ||
1432 | |||
1433 | /* | ||
1434 | * reserve some space for a qgroup and all its parents. The reservation takes | ||
1435 | * place with start_transaction or dealloc_reserve, similar to ENOSPC | ||
1436 | * accounting. If not enough space is available, EDQUOT is returned. | ||
1437 | * We assume that the requested space is new for all qgroups. | ||
1438 | */ | ||
1439 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | ||
1440 | { | ||
1441 | struct btrfs_root *quota_root; | ||
1442 | struct btrfs_qgroup *qgroup; | ||
1443 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1444 | u64 ref_root = root->root_key.objectid; | ||
1445 | int ret = 0; | ||
1446 | struct ulist *ulist = NULL; | ||
1447 | struct ulist_node *unode; | ||
1448 | struct ulist_iterator uiter; | ||
1449 | |||
1450 | if (!is_fstree(ref_root)) | ||
1451 | return 0; | ||
1452 | |||
1453 | if (num_bytes == 0) | ||
1454 | return 0; | ||
1455 | |||
1456 | spin_lock(&fs_info->qgroup_lock); | ||
1457 | quota_root = fs_info->quota_root; | ||
1458 | if (!quota_root) | ||
1459 | goto out; | ||
1460 | |||
1461 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
1462 | if (!qgroup) | ||
1463 | goto out; | ||
1464 | |||
1465 | /* | ||
1466 | * in a first step, we check all affected qgroups if any limits would | ||
1467 | * be exceeded | ||
1468 | */ | ||
1469 | ulist = ulist_alloc(GFP_ATOMIC); | ||
1470 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | ||
1471 | ULIST_ITER_INIT(&uiter); | ||
1472 | while ((unode = ulist_next(ulist, &uiter))) { | ||
1473 | struct btrfs_qgroup *qg; | ||
1474 | struct btrfs_qgroup_list *glist; | ||
1475 | |||
1476 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1477 | |||
1478 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | ||
1479 | qg->reserved + qg->rfer + num_bytes > | ||
1480 | qg->max_rfer) | ||
1481 | ret = -EDQUOT; | ||
1482 | |||
1483 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && | ||
1484 | qg->reserved + qg->excl + num_bytes > | ||
1485 | qg->max_excl) | ||
1486 | ret = -EDQUOT; | ||
1487 | |||
1488 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1489 | ulist_add(ulist, glist->group->qgroupid, | ||
1490 | (unsigned long)glist->group, GFP_ATOMIC); | ||
1491 | } | ||
1492 | } | ||
1493 | if (ret) | ||
1494 | goto out; | ||
1495 | |||
1496 | /* | ||
1497 | * no limits exceeded, now record the reservation into all qgroups | ||
1498 | */ | ||
1499 | ULIST_ITER_INIT(&uiter); | ||
1500 | while ((unode = ulist_next(ulist, &uiter))) { | ||
1501 | struct btrfs_qgroup *qg; | ||
1502 | |||
1503 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1504 | |||
1505 | qg->reserved += num_bytes; | ||
1506 | } | ||
1507 | |||
1508 | out: | ||
1509 | spin_unlock(&fs_info->qgroup_lock); | ||
1510 | ulist_free(ulist); | ||
1511 | |||
1512 | return ret; | ||
1513 | } | ||
1514 | |||
1515 | void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | ||
1516 | { | ||
1517 | struct btrfs_root *quota_root; | ||
1518 | struct btrfs_qgroup *qgroup; | ||
1519 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1520 | struct ulist *ulist = NULL; | ||
1521 | struct ulist_node *unode; | ||
1522 | struct ulist_iterator uiter; | ||
1523 | u64 ref_root = root->root_key.objectid; | ||
1524 | |||
1525 | if (!is_fstree(ref_root)) | ||
1526 | return; | ||
1527 | |||
1528 | if (num_bytes == 0) | ||
1529 | return; | ||
1530 | |||
1531 | spin_lock(&fs_info->qgroup_lock); | ||
1532 | |||
1533 | quota_root = fs_info->quota_root; | ||
1534 | if (!quota_root) | ||
1535 | goto out; | ||
1536 | |||
1537 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
1538 | if (!qgroup) | ||
1539 | goto out; | ||
1540 | |||
1541 | ulist = ulist_alloc(GFP_ATOMIC); | ||
1542 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | ||
1543 | ULIST_ITER_INIT(&uiter); | ||
1544 | while ((unode = ulist_next(ulist, &uiter))) { | ||
1545 | struct btrfs_qgroup *qg; | ||
1546 | struct btrfs_qgroup_list *glist; | ||
1547 | |||
1548 | qg = (struct btrfs_qgroup *)unode->aux; | ||
1549 | |||
1550 | qg->reserved -= num_bytes; | ||
1551 | |||
1552 | list_for_each_entry(glist, &qg->groups, next_group) { | ||
1553 | ulist_add(ulist, glist->group->qgroupid, | ||
1554 | (unsigned long)glist->group, GFP_ATOMIC); | ||
1555 | } | ||
1556 | } | ||
1557 | |||
1558 | out: | ||
1559 | spin_unlock(&fs_info->qgroup_lock); | ||
1560 | ulist_free(ulist); | ||
1561 | } | ||
1562 | |||
1563 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | ||
1564 | { | ||
1565 | if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) | ||
1566 | return; | ||
1567 | printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", | ||
1568 | trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", | ||
1569 | trans->delayed_ref_elem.seq); | ||
1570 | BUG(); | ||
1571 | } | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 328b95f67660..cc20e95ea289 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -38,7 +38,6 @@ void put_transaction(struct btrfs_transaction *transaction) | |||
38 | if (atomic_dec_and_test(&transaction->use_count)) { | 38 | if (atomic_dec_and_test(&transaction->use_count)) { |
39 | BUG_ON(!list_empty(&transaction->list)); | 39 | BUG_ON(!list_empty(&transaction->list)); |
40 | WARN_ON(transaction->delayed_refs.root.rb_node); | 40 | WARN_ON(transaction->delayed_refs.root.rb_node); |
41 | WARN_ON(!list_empty(&transaction->delayed_refs.seq_head)); | ||
42 | memset(transaction, 0, sizeof(*transaction)); | 41 | memset(transaction, 0, sizeof(*transaction)); |
43 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 42 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
44 | } | 43 | } |
@@ -126,7 +125,6 @@ loop: | |||
126 | cur_trans->delayed_refs.num_heads = 0; | 125 | cur_trans->delayed_refs.num_heads = 0; |
127 | cur_trans->delayed_refs.flushing = 0; | 126 | cur_trans->delayed_refs.flushing = 0; |
128 | cur_trans->delayed_refs.run_delayed_start = 0; | 127 | cur_trans->delayed_refs.run_delayed_start = 0; |
129 | cur_trans->delayed_refs.seq = 1; | ||
130 | 128 | ||
131 | /* | 129 | /* |
132 | * although the tree mod log is per file system and not per transaction, | 130 | * although the tree mod log is per file system and not per transaction, |
@@ -145,10 +143,8 @@ loop: | |||
145 | } | 143 | } |
146 | atomic_set(&fs_info->tree_mod_seq, 0); | 144 | atomic_set(&fs_info->tree_mod_seq, 0); |
147 | 145 | ||
148 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); | ||
149 | spin_lock_init(&cur_trans->commit_lock); | 146 | spin_lock_init(&cur_trans->commit_lock); |
150 | spin_lock_init(&cur_trans->delayed_refs.lock); | 147 | spin_lock_init(&cur_trans->delayed_refs.lock); |
151 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); | ||
152 | 148 | ||
153 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 149 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
154 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 150 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
@@ -299,6 +295,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
299 | struct btrfs_transaction *cur_trans; | 295 | struct btrfs_transaction *cur_trans; |
300 | u64 num_bytes = 0; | 296 | u64 num_bytes = 0; |
301 | int ret; | 297 | int ret; |
298 | u64 qgroup_reserved = 0; | ||
302 | 299 | ||
303 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 300 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
304 | return ERR_PTR(-EROFS); | 301 | return ERR_PTR(-EROFS); |
@@ -317,6 +314,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
317 | * the appropriate flushing if need be. | 314 | * the appropriate flushing if need be. |
318 | */ | 315 | */ |
319 | if (num_items > 0 && root != root->fs_info->chunk_root) { | 316 | if (num_items > 0 && root != root->fs_info->chunk_root) { |
317 | if (root->fs_info->quota_enabled && | ||
318 | is_fstree(root->root_key.objectid)) { | ||
319 | qgroup_reserved = num_items * root->leafsize; | ||
320 | ret = btrfs_qgroup_reserve(root, qgroup_reserved); | ||
321 | if (ret) | ||
322 | return ERR_PTR(ret); | ||
323 | } | ||
324 | |||
320 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 325 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
321 | ret = btrfs_block_rsv_add(root, | 326 | ret = btrfs_block_rsv_add(root, |
322 | &root->fs_info->trans_block_rsv, | 327 | &root->fs_info->trans_block_rsv, |
@@ -349,12 +354,16 @@ again: | |||
349 | h->transaction = cur_trans; | 354 | h->transaction = cur_trans; |
350 | h->blocks_used = 0; | 355 | h->blocks_used = 0; |
351 | h->bytes_reserved = 0; | 356 | h->bytes_reserved = 0; |
357 | h->root = root; | ||
352 | h->delayed_ref_updates = 0; | 358 | h->delayed_ref_updates = 0; |
353 | h->use_count = 1; | 359 | h->use_count = 1; |
354 | h->adding_csums = 0; | 360 | h->adding_csums = 0; |
355 | h->block_rsv = NULL; | 361 | h->block_rsv = NULL; |
356 | h->orig_rsv = NULL; | 362 | h->orig_rsv = NULL; |
357 | h->aborted = 0; | 363 | h->aborted = 0; |
364 | h->qgroup_reserved = qgroup_reserved; | ||
365 | h->delayed_ref_elem.seq = 0; | ||
366 | INIT_LIST_HEAD(&h->qgroup_ref_list); | ||
358 | 367 | ||
359 | smp_mb(); | 368 | smp_mb(); |
360 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 369 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
@@ -505,6 +514,24 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
505 | return 0; | 514 | return 0; |
506 | } | 515 | } |
507 | 516 | ||
517 | /* | ||
518 | * do the qgroup accounting as early as possible | ||
519 | */ | ||
520 | err = btrfs_delayed_refs_qgroup_accounting(trans, info); | ||
521 | |||
522 | btrfs_trans_release_metadata(trans, root); | ||
523 | trans->block_rsv = NULL; | ||
524 | /* | ||
525 | * the same root has to be passed to start_transaction and | ||
526 | * end_transaction. Subvolume quota depends on this. | ||
527 | */ | ||
528 | WARN_ON(trans->root != root); | ||
529 | |||
530 | if (trans->qgroup_reserved) { | ||
531 | btrfs_qgroup_free(root, trans->qgroup_reserved); | ||
532 | trans->qgroup_reserved = 0; | ||
533 | } | ||
534 | |||
508 | while (count < 2) { | 535 | while (count < 2) { |
509 | unsigned long cur = trans->delayed_ref_updates; | 536 | unsigned long cur = trans->delayed_ref_updates; |
510 | trans->delayed_ref_updates = 0; | 537 | trans->delayed_ref_updates = 0; |
@@ -559,6 +586,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
559 | root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 586 | root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
560 | err = -EIO; | 587 | err = -EIO; |
561 | } | 588 | } |
589 | assert_qgroups_uptodate(trans); | ||
562 | 590 | ||
563 | memset(trans, 0, sizeof(*trans)); | 591 | memset(trans, 0, sizeof(*trans)); |
564 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 592 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
@@ -777,6 +805,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
777 | ret = btrfs_run_dev_stats(trans, root->fs_info); | 805 | ret = btrfs_run_dev_stats(trans, root->fs_info); |
778 | BUG_ON(ret); | 806 | BUG_ON(ret); |
779 | 807 | ||
808 | ret = btrfs_run_qgroups(trans, root->fs_info); | ||
809 | BUG_ON(ret); | ||
810 | |||
811 | /* run_qgroups might have added some more refs */ | ||
812 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
813 | BUG_ON(ret); | ||
814 | |||
780 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 815 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
781 | next = fs_info->dirty_cowonly_roots.next; | 816 | next = fs_info->dirty_cowonly_roots.next; |
782 | list_del_init(next); | 817 | list_del_init(next); |
@@ -949,6 +984,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
949 | } | 984 | } |
950 | } | 985 | } |
951 | 986 | ||
987 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, | ||
988 | objectid, pending->inherit); | ||
989 | kfree(pending->inherit); | ||
990 | if (ret) { | ||
991 | pending->error = ret; | ||
992 | goto fail; | ||
993 | } | ||
994 | |||
952 | key.objectid = objectid; | 995 | key.objectid = objectid; |
953 | key.offset = (u64)-1; | 996 | key.offset = (u64)-1; |
954 | key.type = BTRFS_ROOT_ITEM_KEY; | 997 | key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -1345,6 +1388,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1345 | goto cleanup_transaction; | 1388 | goto cleanup_transaction; |
1346 | 1389 | ||
1347 | /* | 1390 | /* |
1391 | * running the delayed items may have added new refs. account | ||
1392 | * them now so that they hinder processing of more delayed refs | ||
1393 | * as little as possible. | ||
1394 | */ | ||
1395 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
1396 | |||
1397 | /* | ||
1348 | * rename don't use btrfs_join_transaction, so, once we | 1398 | * rename don't use btrfs_join_transaction, so, once we |
1349 | * set the transaction to blocked above, we aren't going | 1399 | * set the transaction to blocked above, we aren't going |
1350 | * to get any new ordered operations. We can safely run | 1400 | * to get any new ordered operations. We can safely run |
@@ -1456,6 +1506,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1456 | root->fs_info->chunk_root->node); | 1506 | root->fs_info->chunk_root->node); |
1457 | switch_commit_root(root->fs_info->chunk_root); | 1507 | switch_commit_root(root->fs_info->chunk_root); |
1458 | 1508 | ||
1509 | assert_qgroups_uptodate(trans); | ||
1459 | update_super_roots(root); | 1510 | update_super_roots(root); |
1460 | 1511 | ||
1461 | if (!root->fs_info->log_root_recovering) { | 1512 | if (!root->fs_info->log_root_recovering) { |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d314a74b4968..e8b8416c688b 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define __BTRFS_TRANSACTION__ | 20 | #define __BTRFS_TRANSACTION__ |
21 | #include "btrfs_inode.h" | 21 | #include "btrfs_inode.h" |
22 | #include "delayed-ref.h" | 22 | #include "delayed-ref.h" |
23 | #include "ctree.h" | ||
23 | 24 | ||
24 | struct btrfs_transaction { | 25 | struct btrfs_transaction { |
25 | u64 transid; | 26 | u64 transid; |
@@ -49,6 +50,7 @@ struct btrfs_transaction { | |||
49 | struct btrfs_trans_handle { | 50 | struct btrfs_trans_handle { |
50 | u64 transid; | 51 | u64 transid; |
51 | u64 bytes_reserved; | 52 | u64 bytes_reserved; |
53 | u64 qgroup_reserved; | ||
52 | unsigned long use_count; | 54 | unsigned long use_count; |
53 | unsigned long blocks_reserved; | 55 | unsigned long blocks_reserved; |
54 | unsigned long blocks_used; | 56 | unsigned long blocks_used; |
@@ -58,12 +60,21 @@ struct btrfs_trans_handle { | |||
58 | struct btrfs_block_rsv *orig_rsv; | 60 | struct btrfs_block_rsv *orig_rsv; |
59 | int aborted; | 61 | int aborted; |
60 | int adding_csums; | 62 | int adding_csums; |
63 | /* | ||
64 | * this root is only needed to validate that the root passed to | ||
65 | * start_transaction is the same as the one passed to end_transaction. | ||
66 | * Subvolume quota depends on this | ||
67 | */ | ||
68 | struct btrfs_root *root; | ||
69 | struct seq_list delayed_ref_elem; | ||
70 | struct list_head qgroup_ref_list; | ||
61 | }; | 71 | }; |
62 | 72 | ||
63 | struct btrfs_pending_snapshot { | 73 | struct btrfs_pending_snapshot { |
64 | struct dentry *dentry; | 74 | struct dentry *dentry; |
65 | struct btrfs_root *root; | 75 | struct btrfs_root *root; |
66 | struct btrfs_root *snap; | 76 | struct btrfs_root *snap; |
77 | struct btrfs_qgroup_inherit *inherit; | ||
67 | /* block reservation for the operation */ | 78 | /* block reservation for the operation */ |
68 | struct btrfs_block_rsv block_rsv; | 79 | struct btrfs_block_rsv block_rsv; |
69 | /* extra metadata reseration for relocation */ | 80 | /* extra metadata reseration for relocation */ |