aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2012-07-25 16:11:38 -0400
committerChris Mason <chris.mason@fusionio.com>2012-07-25 16:11:38 -0400
commitb478b2baa37ac99fc04a30809c780dd5dfd43595 (patch)
treebed7af1466e5b1e0b0501eba18f77c804a864d7d
parent67c9684f48ea9cbc5e9b8a1feb3151800e9dcc22 (diff)
parent6f72c7e20dbaea55f04546de69586c84a3654503 (diff)
Merge branch 'qgroup' of git://git.jan-o-sch.net/btrfs-unstable into for-linus
Conflicts: fs/btrfs/ioctl.c fs/btrfs/ioctl.h fs/btrfs/transaction.c fs/btrfs/transaction.h Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/backref.c30
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/ctree.c347
-rw-r--r--fs/btrfs/ctree.h233
-rw-r--r--fs/btrfs/delayed-ref.c56
-rw-r--r--fs/btrfs/delayed-ref.h62
-rw-r--r--fs/btrfs/disk-io.c134
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/extent-tree.c119
-rw-r--r--fs/btrfs/ioctl.c244
-rw-r--r--fs/btrfs/ioctl.h62
-rw-r--r--fs/btrfs/qgroup.c1571
-rw-r--r--fs/btrfs/transaction.c59
-rw-r--r--fs/btrfs/transaction.h11
15 files changed, 2697 insertions, 242 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 0c4fa2befae..0bc4d3a10a5 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
11 reada.o backref.o ulist.o 11 reada.o backref.o ulist.o qgroup.o
12 12
13btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 13btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
14btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o 14btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a383c18e74e..7d80ddd8f54 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
773 */ 773 */
774static int find_parent_nodes(struct btrfs_trans_handle *trans, 774static int find_parent_nodes(struct btrfs_trans_handle *trans,
775 struct btrfs_fs_info *fs_info, u64 bytenr, 775 struct btrfs_fs_info *fs_info, u64 bytenr,
776 u64 delayed_ref_seq, u64 time_seq, 776 u64 time_seq, struct ulist *refs,
777 struct ulist *refs, struct ulist *roots, 777 struct ulist *roots, const u64 *extent_item_pos)
778 const u64 *extent_item_pos)
779{ 778{
780 struct btrfs_key key; 779 struct btrfs_key key;
781 struct btrfs_path *path; 780 struct btrfs_path *path;
@@ -837,7 +836,7 @@ again:
837 btrfs_put_delayed_ref(&head->node); 836 btrfs_put_delayed_ref(&head->node);
838 goto again; 837 goto again;
839 } 838 }
840 ret = __add_delayed_refs(head, delayed_ref_seq, 839 ret = __add_delayed_refs(head, time_seq,
841 &prefs_delayed); 840 &prefs_delayed);
842 mutex_unlock(&head->mutex); 841 mutex_unlock(&head->mutex);
843 if (ret) { 842 if (ret) {
@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks)
981 */ 980 */
982static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, 981static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
983 struct btrfs_fs_info *fs_info, u64 bytenr, 982 struct btrfs_fs_info *fs_info, u64 bytenr,
984 u64 delayed_ref_seq, u64 time_seq, 983 u64 time_seq, struct ulist **leafs,
985 struct ulist **leafs,
986 const u64 *extent_item_pos) 984 const u64 *extent_item_pos)
987{ 985{
988 struct ulist *tmp; 986 struct ulist *tmp;
@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
997 return -ENOMEM; 995 return -ENOMEM;
998 } 996 }
999 997
1000 ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, 998 ret = find_parent_nodes(trans, fs_info, bytenr,
1001 time_seq, *leafs, tmp, extent_item_pos); 999 time_seq, *leafs, tmp, extent_item_pos);
1002 ulist_free(tmp); 1000 ulist_free(tmp);
1003 1001
@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1024 */ 1022 */
1025int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1023int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1026 struct btrfs_fs_info *fs_info, u64 bytenr, 1024 struct btrfs_fs_info *fs_info, u64 bytenr,
1027 u64 delayed_ref_seq, u64 time_seq, 1025 u64 time_seq, struct ulist **roots)
1028 struct ulist **roots)
1029{ 1026{
1030 struct ulist *tmp; 1027 struct ulist *tmp;
1031 struct ulist_node *node = NULL; 1028 struct ulist_node *node = NULL;
@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1043 1040
1044 ULIST_ITER_INIT(&uiter); 1041 ULIST_ITER_INIT(&uiter);
1045 while (1) { 1042 while (1) {
1046 ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, 1043 ret = find_parent_nodes(trans, fs_info, bytenr,
1047 time_seq, tmp, *roots, NULL); 1044 time_seq, tmp, *roots, NULL);
1048 if (ret < 0 && ret != -ENOENT) { 1045 if (ret < 0 && ret != -ENOENT) {
1049 ulist_free(tmp); 1046 ulist_free(tmp);
@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1376 struct ulist *roots = NULL; 1373 struct ulist *roots = NULL;
1377 struct ulist_node *ref_node = NULL; 1374 struct ulist_node *ref_node = NULL;
1378 struct ulist_node *root_node = NULL; 1375 struct ulist_node *root_node = NULL;
1379 struct seq_list seq_elem = {};
1380 struct seq_list tree_mod_seq_elem = {}; 1376 struct seq_list tree_mod_seq_elem = {};
1381 struct ulist_iterator ref_uiter; 1377 struct ulist_iterator ref_uiter;
1382 struct ulist_iterator root_uiter; 1378 struct ulist_iterator root_uiter;
1383 struct btrfs_delayed_ref_root *delayed_refs = NULL;
1384 1379
1385 pr_debug("resolving all inodes for extent %llu\n", 1380 pr_debug("resolving all inodes for extent %llu\n",
1386 extent_item_objectid); 1381 extent_item_objectid);
@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1391 trans = btrfs_join_transaction(fs_info->extent_root); 1386 trans = btrfs_join_transaction(fs_info->extent_root);
1392 if (IS_ERR(trans)) 1387 if (IS_ERR(trans))
1393 return PTR_ERR(trans); 1388 return PTR_ERR(trans);
1394
1395 delayed_refs = &trans->transaction->delayed_refs;
1396 spin_lock(&delayed_refs->lock);
1397 btrfs_get_delayed_seq(delayed_refs, &seq_elem);
1398 spin_unlock(&delayed_refs->lock);
1399 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1389 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1400 } 1390 }
1401 1391
1402 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1392 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
1403 seq_elem.seq, tree_mod_seq_elem.seq, &refs, 1393 tree_mod_seq_elem.seq, &refs,
1404 &extent_item_pos); 1394 &extent_item_pos);
1405 if (ret) 1395 if (ret)
1406 goto out; 1396 goto out;
@@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1408 ULIST_ITER_INIT(&ref_uiter); 1398 ULIST_ITER_INIT(&ref_uiter);
1409 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 1399 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
1410 ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, 1400 ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
1411 seq_elem.seq, 1401 tree_mod_seq_elem.seq, &roots);
1412 tree_mod_seq_elem.seq, &roots);
1413 if (ret) 1402 if (ret)
1414 break; 1403 break;
1415 ULIST_ITER_INIT(&root_uiter); 1404 ULIST_ITER_INIT(&root_uiter);
@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1431out: 1420out:
1432 if (!search_commit_root) { 1421 if (!search_commit_root) {
1433 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1422 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1434 btrfs_put_delayed_seq(delayed_refs, &seq_elem);
1435 btrfs_end_transaction(trans, fs_info->extent_root); 1423 btrfs_end_transaction(trans, fs_info->extent_root);
1436 } 1424 }
1437 1425
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index c18d8ac7b79..3a1ad3e2dcb 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -58,8 +58,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
58 58
59int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 59int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
60 struct btrfs_fs_info *fs_info, u64 bytenr, 60 struct btrfs_fs_info *fs_info, u64 bytenr,
61 u64 delayed_ref_seq, u64 time_seq, 61 u64 time_seq, struct ulist **roots);
62 struct ulist **roots);
63 62
64struct btrfs_data_container *init_data_container(u32 total_bytes); 63struct btrfs_data_container *init_data_container(u32 total_bytes);
65struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 64struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 67fe46fdee6..fb21431fe4e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -321,7 +321,7 @@ struct tree_mod_root {
321struct tree_mod_elem { 321struct tree_mod_elem {
322 struct rb_node node; 322 struct rb_node node;
323 u64 index; /* shifted logical */ 323 u64 index; /* shifted logical */
324 struct seq_list elem; 324 u64 seq;
325 enum mod_log_op op; 325 enum mod_log_op op;
326 326
327 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ 327 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
@@ -341,20 +341,50 @@ struct tree_mod_elem {
341 struct tree_mod_root old_root; 341 struct tree_mod_root old_root;
342}; 342};
343 343
344static inline void 344static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
345__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
346{ 345{
347 elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); 346 read_lock(&fs_info->tree_mod_log_lock);
348 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
349} 347}
350 348
351void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 349static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
352 struct seq_list *elem) 350{
351 read_unlock(&fs_info->tree_mod_log_lock);
352}
353
354static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
355{
356 write_lock(&fs_info->tree_mod_log_lock);
357}
358
359static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
353{ 360{
354 elem->flags = 1; 361 write_unlock(&fs_info->tree_mod_log_lock);
362}
363
364/*
365 * This adds a new blocker to the tree mod log's blocker list if the @elem
366 * passed does not already have a sequence number set. So when a caller expects
367 * to record tree modifications, it should ensure to set elem->seq to zero
368 * before calling btrfs_get_tree_mod_seq.
369 * Returns a fresh, unused tree log modification sequence number, even if no new
370 * blocker was added.
371 */
372u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
373 struct seq_list *elem)
374{
375 u64 seq;
376
377 tree_mod_log_write_lock(fs_info);
355 spin_lock(&fs_info->tree_mod_seq_lock); 378 spin_lock(&fs_info->tree_mod_seq_lock);
356 __get_tree_mod_seq(fs_info, elem); 379 if (!elem->seq) {
380 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
381 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
382 }
383 seq = btrfs_inc_tree_mod_seq(fs_info);
357 spin_unlock(&fs_info->tree_mod_seq_lock); 384 spin_unlock(&fs_info->tree_mod_seq_lock);
385 tree_mod_log_write_unlock(fs_info);
386
387 return seq;
358} 388}
359 389
360void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 390void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -371,41 +401,46 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
371 if (!seq_putting) 401 if (!seq_putting)
372 return; 402 return;
373 403
374 BUG_ON(!(elem->flags & 1));
375 spin_lock(&fs_info->tree_mod_seq_lock); 404 spin_lock(&fs_info->tree_mod_seq_lock);
376 list_del(&elem->list); 405 list_del(&elem->list);
406 elem->seq = 0;
377 407
378 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { 408 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
379 if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { 409 if (cur_elem->seq < min_seq) {
380 if (seq_putting > cur_elem->seq) { 410 if (seq_putting > cur_elem->seq) {
381 /* 411 /*
382 * blocker with lower sequence number exists, we 412 * blocker with lower sequence number exists, we
383 * cannot remove anything from the log 413 * cannot remove anything from the log
384 */ 414 */
385 goto out; 415 spin_unlock(&fs_info->tree_mod_seq_lock);
416 return;
386 } 417 }
387 min_seq = cur_elem->seq; 418 min_seq = cur_elem->seq;
388 } 419 }
389 } 420 }
421 spin_unlock(&fs_info->tree_mod_seq_lock);
422
423 /*
424 * we removed the lowest blocker from the blocker list, so there may be
425 * more processible delayed refs.
426 */
427 wake_up(&fs_info->tree_mod_seq_wait);
390 428
391 /* 429 /*
392 * anything that's lower than the lowest existing (read: blocked) 430 * anything that's lower than the lowest existing (read: blocked)
393 * sequence number can be removed from the tree. 431 * sequence number can be removed from the tree.
394 */ 432 */
395 write_lock(&fs_info->tree_mod_log_lock); 433 tree_mod_log_write_lock(fs_info);
396 tm_root = &fs_info->tree_mod_log; 434 tm_root = &fs_info->tree_mod_log;
397 for (node = rb_first(tm_root); node; node = next) { 435 for (node = rb_first(tm_root); node; node = next) {
398 next = rb_next(node); 436 next = rb_next(node);
399 tm = container_of(node, struct tree_mod_elem, node); 437 tm = container_of(node, struct tree_mod_elem, node);
400 if (tm->elem.seq > min_seq) 438 if (tm->seq > min_seq)
401 continue; 439 continue;
402 rb_erase(node, tm_root); 440 rb_erase(node, tm_root);
403 list_del(&tm->elem.list);
404 kfree(tm); 441 kfree(tm);
405 } 442 }
406 write_unlock(&fs_info->tree_mod_log_lock); 443 tree_mod_log_write_unlock(fs_info);
407out:
408 spin_unlock(&fs_info->tree_mod_seq_lock);
409} 444}
410 445
411/* 446/*
@@ -423,11 +458,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
423 struct rb_node **new; 458 struct rb_node **new;
424 struct rb_node *parent = NULL; 459 struct rb_node *parent = NULL;
425 struct tree_mod_elem *cur; 460 struct tree_mod_elem *cur;
426 int ret = 0;
427 461
428 BUG_ON(!tm || !tm->elem.seq); 462 BUG_ON(!tm || !tm->seq);
429 463
430 write_lock(&fs_info->tree_mod_log_lock);
431 tm_root = &fs_info->tree_mod_log; 464 tm_root = &fs_info->tree_mod_log;
432 new = &tm_root->rb_node; 465 new = &tm_root->rb_node;
433 while (*new) { 466 while (*new) {
@@ -437,88 +470,81 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
437 new = &((*new)->rb_left); 470 new = &((*new)->rb_left);
438 else if (cur->index > tm->index) 471 else if (cur->index > tm->index)
439 new = &((*new)->rb_right); 472 new = &((*new)->rb_right);
440 else if (cur->elem.seq < tm->elem.seq) 473 else if (cur->seq < tm->seq)
441 new = &((*new)->rb_left); 474 new = &((*new)->rb_left);
442 else if (cur->elem.seq > tm->elem.seq) 475 else if (cur->seq > tm->seq)
443 new = &((*new)->rb_right); 476 new = &((*new)->rb_right);
444 else { 477 else {
445 kfree(tm); 478 kfree(tm);
446 ret = -EEXIST; 479 return -EEXIST;
447 goto unlock;
448 } 480 }
449 } 481 }
450 482
451 rb_link_node(&tm->node, parent, new); 483 rb_link_node(&tm->node, parent, new);
452 rb_insert_color(&tm->node, tm_root); 484 rb_insert_color(&tm->node, tm_root);
453unlock: 485 return 0;
454 write_unlock(&fs_info->tree_mod_log_lock);
455 return ret;
456} 486}
457 487
488/*
489 * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
490 * returns zero with the tree_mod_log_lock acquired. The caller must hold
491 * this until all tree mod log insertions are recorded in the rb tree and then
492 * call tree_mod_log_write_unlock() to release.
493 */
458static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, 494static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
459 struct extent_buffer *eb) { 495 struct extent_buffer *eb) {
460 smp_mb(); 496 smp_mb();
461 if (list_empty(&(fs_info)->tree_mod_seq_list)) 497 if (list_empty(&(fs_info)->tree_mod_seq_list))
462 return 1; 498 return 1;
463 if (!eb) 499 if (eb && btrfs_header_level(eb) == 0)
464 return 0; 500 return 1;
465 if (btrfs_header_level(eb) == 0) 501
502 tree_mod_log_write_lock(fs_info);
503 if (list_empty(&fs_info->tree_mod_seq_list)) {
504 /*
505 * someone emptied the list while we were waiting for the lock.
506 * we must not add to the list when no blocker exists.
507 */
508 tree_mod_log_write_unlock(fs_info);
466 return 1; 509 return 1;
510 }
511
467 return 0; 512 return 0;
468} 513}
469 514
470/* 515/*
471 * This allocates memory and gets a tree modification sequence number when 516 * This allocates memory and gets a tree modification sequence number.
472 * needed.
473 * 517 *
474 * Returns 0 when no sequence number is needed, < 0 on error. 518 * Returns <0 on error.
475 * Returns 1 when a sequence number was added. In this case, 519 * Returns >0 (the added sequence number) on success.
476 * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
477 * after inserting into the rb tree.
478 */ 520 */
479static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, 521static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
480 struct tree_mod_elem **tm_ret) 522 struct tree_mod_elem **tm_ret)
481{ 523{
482 struct tree_mod_elem *tm; 524 struct tree_mod_elem *tm;
483 int seq;
484 525
485 if (tree_mod_dont_log(fs_info, NULL)) 526 /*
486 return 0; 527 * once we switch from spin locks to something different, we should
487 528 * honor the flags parameter here.
488 tm = *tm_ret = kzalloc(sizeof(*tm), flags); 529 */
530 tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
489 if (!tm) 531 if (!tm)
490 return -ENOMEM; 532 return -ENOMEM;
491 533
492 tm->elem.flags = 0; 534 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
493 spin_lock(&fs_info->tree_mod_seq_lock); 535 return tm->seq;
494 if (list_empty(&fs_info->tree_mod_seq_list)) {
495 /*
496 * someone emptied the list while we were waiting for the lock.
497 * we must not add to the list, because no blocker exists. items
498 * are removed from the list only when the existing blocker is
499 * removed from the list.
500 */
501 kfree(tm);
502 seq = 0;
503 spin_unlock(&fs_info->tree_mod_seq_lock);
504 } else {
505 __get_tree_mod_seq(fs_info, &tm->elem);
506 seq = tm->elem.seq;
507 }
508
509 return seq;
510} 536}
511 537
512static noinline int 538static inline int
513tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, 539__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
514 struct extent_buffer *eb, int slot, 540 struct extent_buffer *eb, int slot,
515 enum mod_log_op op, gfp_t flags) 541 enum mod_log_op op, gfp_t flags)
516{ 542{
517 struct tree_mod_elem *tm;
518 int ret; 543 int ret;
544 struct tree_mod_elem *tm;
519 545
520 ret = tree_mod_alloc(fs_info, flags, &tm); 546 ret = tree_mod_alloc(fs_info, flags, &tm);
521 if (ret <= 0) 547 if (ret < 0)
522 return ret; 548 return ret;
523 549
524 tm->index = eb->start >> PAGE_CACHE_SHIFT; 550 tm->index = eb->start >> PAGE_CACHE_SHIFT;
@@ -530,8 +556,22 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
530 tm->slot = slot; 556 tm->slot = slot;
531 tm->generation = btrfs_node_ptr_generation(eb, slot); 557 tm->generation = btrfs_node_ptr_generation(eb, slot);
532 558
533 ret = __tree_mod_log_insert(fs_info, tm); 559 return __tree_mod_log_insert(fs_info, tm);
534 spin_unlock(&fs_info->tree_mod_seq_lock); 560}
561
562static noinline int
563tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
564 struct extent_buffer *eb, int slot,
565 enum mod_log_op op, gfp_t flags)
566{
567 int ret;
568
569 if (tree_mod_dont_log(fs_info, eb))
570 return 0;
571
572 ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
573
574 tree_mod_log_write_unlock(fs_info);
535 return ret; 575 return ret;
536} 576}
537 577
@@ -543,6 +583,14 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
543} 583}
544 584
545static noinline int 585static noinline int
586tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
587 struct extent_buffer *eb, int slot,
588 enum mod_log_op op)
589{
590 return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
591}
592
593static noinline int
546tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, 594tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
547 struct extent_buffer *eb, int dst_slot, int src_slot, 595 struct extent_buffer *eb, int dst_slot, int src_slot,
548 int nr_items, gfp_t flags) 596 int nr_items, gfp_t flags)
@@ -555,14 +603,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
555 return 0; 603 return 0;
556 604
557 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 605 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
558 ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, 606 ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
559 MOD_LOG_KEY_REMOVE_WHILE_MOVING); 607 MOD_LOG_KEY_REMOVE_WHILE_MOVING);
560 BUG_ON(ret < 0); 608 BUG_ON(ret < 0);
561 } 609 }
562 610
563 ret = tree_mod_alloc(fs_info, flags, &tm); 611 ret = tree_mod_alloc(fs_info, flags, &tm);
564 if (ret <= 0) 612 if (ret < 0)
565 return ret; 613 goto out;
566 614
567 tm->index = eb->start >> PAGE_CACHE_SHIFT; 615 tm->index = eb->start >> PAGE_CACHE_SHIFT;
568 tm->slot = src_slot; 616 tm->slot = src_slot;
@@ -571,10 +619,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
571 tm->op = MOD_LOG_MOVE_KEYS; 619 tm->op = MOD_LOG_MOVE_KEYS;
572 620
573 ret = __tree_mod_log_insert(fs_info, tm); 621 ret = __tree_mod_log_insert(fs_info, tm);
574 spin_unlock(&fs_info->tree_mod_seq_lock); 622out:
623 tree_mod_log_write_unlock(fs_info);
575 return ret; 624 return ret;
576} 625}
577 626
627static inline void
628__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
629{
630 int i;
631 u32 nritems;
632 int ret;
633
634 nritems = btrfs_header_nritems(eb);
635 for (i = nritems - 1; i >= 0; i--) {
636 ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
637 MOD_LOG_KEY_REMOVE_WHILE_FREEING);
638 BUG_ON(ret < 0);
639 }
640}
641
578static noinline int 642static noinline int
579tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, 643tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
580 struct extent_buffer *old_root, 644 struct extent_buffer *old_root,
@@ -583,9 +647,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
583 struct tree_mod_elem *tm; 647 struct tree_mod_elem *tm;
584 int ret; 648 int ret;
585 649
650 if (tree_mod_dont_log(fs_info, NULL))
651 return 0;
652
653 __tree_mod_log_free_eb(fs_info, old_root);
654
586 ret = tree_mod_alloc(fs_info, flags, &tm); 655 ret = tree_mod_alloc(fs_info, flags, &tm);
587 if (ret <= 0) 656 if (ret < 0)
588 return ret; 657 goto out;
589 658
590 tm->index = new_root->start >> PAGE_CACHE_SHIFT; 659 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
591 tm->old_root.logical = old_root->start; 660 tm->old_root.logical = old_root->start;
@@ -594,7 +663,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
594 tm->op = MOD_LOG_ROOT_REPLACE; 663 tm->op = MOD_LOG_ROOT_REPLACE;
595 664
596 ret = __tree_mod_log_insert(fs_info, tm); 665 ret = __tree_mod_log_insert(fs_info, tm);
597 spin_unlock(&fs_info->tree_mod_seq_lock); 666out:
667 tree_mod_log_write_unlock(fs_info);
598 return ret; 668 return ret;
599} 669}
600 670
@@ -608,7 +678,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
608 struct tree_mod_elem *found = NULL; 678 struct tree_mod_elem *found = NULL;
609 u64 index = start >> PAGE_CACHE_SHIFT; 679 u64 index = start >> PAGE_CACHE_SHIFT;
610 680
611 read_lock(&fs_info->tree_mod_log_lock); 681 tree_mod_log_read_lock(fs_info);
612 tm_root = &fs_info->tree_mod_log; 682 tm_root = &fs_info->tree_mod_log;
613 node = tm_root->rb_node; 683 node = tm_root->rb_node;
614 while (node) { 684 while (node) {
@@ -617,18 +687,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
617 node = node->rb_left; 687 node = node->rb_left;
618 } else if (cur->index > index) { 688 } else if (cur->index > index) {
619 node = node->rb_right; 689 node = node->rb_right;
620 } else if (cur->elem.seq < min_seq) { 690 } else if (cur->seq < min_seq) {
621 node = node->rb_left; 691 node = node->rb_left;
622 } else if (!smallest) { 692 } else if (!smallest) {
623 /* we want the node with the highest seq */ 693 /* we want the node with the highest seq */
624 if (found) 694 if (found)
625 BUG_ON(found->elem.seq > cur->elem.seq); 695 BUG_ON(found->seq > cur->seq);
626 found = cur; 696 found = cur;
627 node = node->rb_left; 697 node = node->rb_left;
628 } else if (cur->elem.seq > min_seq) { 698 } else if (cur->seq > min_seq) {
629 /* we want the node with the smallest seq */ 699 /* we want the node with the smallest seq */
630 if (found) 700 if (found)
631 BUG_ON(found->elem.seq < cur->elem.seq); 701 BUG_ON(found->seq < cur->seq);
632 found = cur; 702 found = cur;
633 node = node->rb_right; 703 node = node->rb_right;
634 } else { 704 } else {
@@ -636,7 +706,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
636 break; 706 break;
637 } 707 }
638 } 708 }
639 read_unlock(&fs_info->tree_mod_log_lock); 709 tree_mod_log_read_unlock(fs_info);
640 710
641 return found; 711 return found;
642} 712}
@@ -664,7 +734,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
664 return __tree_mod_log_search(fs_info, start, min_seq, 0); 734 return __tree_mod_log_search(fs_info, start, min_seq, 0);
665} 735}
666 736
667static inline void 737static noinline void
668tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 738tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
669 struct extent_buffer *src, unsigned long dst_offset, 739 struct extent_buffer *src, unsigned long dst_offset,
670 unsigned long src_offset, int nr_items) 740 unsigned long src_offset, int nr_items)
@@ -675,18 +745,23 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
675 if (tree_mod_dont_log(fs_info, NULL)) 745 if (tree_mod_dont_log(fs_info, NULL))
676 return; 746 return;
677 747
678 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 748 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
749 tree_mod_log_write_unlock(fs_info);
679 return; 750 return;
751 }
680 752
681 /* speed this up by single seq for all operations? */
682 for (i = 0; i < nr_items; i++) { 753 for (i = 0; i < nr_items; i++) {
683 ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, 754 ret = tree_mod_log_insert_key_locked(fs_info, src,
684 MOD_LOG_KEY_REMOVE); 755 i + src_offset,
756 MOD_LOG_KEY_REMOVE);
685 BUG_ON(ret < 0); 757 BUG_ON(ret < 0);
686 ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, 758 ret = tree_mod_log_insert_key_locked(fs_info, dst,
687 MOD_LOG_KEY_ADD); 759 i + dst_offset,
760 MOD_LOG_KEY_ADD);
688 BUG_ON(ret < 0); 761 BUG_ON(ret < 0);
689 } 762 }
763
764 tree_mod_log_write_unlock(fs_info);
690} 765}
691 766
692static inline void 767static inline void
@@ -699,7 +774,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
699 BUG_ON(ret < 0); 774 BUG_ON(ret < 0);
700} 775}
701 776
702static inline void 777static noinline void
703tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, 778tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
704 struct extent_buffer *eb, 779 struct extent_buffer *eb,
705 struct btrfs_disk_key *disk_key, int slot, int atomic) 780 struct btrfs_disk_key *disk_key, int slot, int atomic)
@@ -712,30 +787,22 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
712 BUG_ON(ret < 0); 787 BUG_ON(ret < 0);
713} 788}
714 789
715static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 790static noinline void
716 struct extent_buffer *eb) 791tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
717{ 792{
718 int i;
719 int ret;
720 u32 nritems;
721
722 if (tree_mod_dont_log(fs_info, eb)) 793 if (tree_mod_dont_log(fs_info, eb))
723 return; 794 return;
724 795
725 nritems = btrfs_header_nritems(eb); 796 __tree_mod_log_free_eb(fs_info, eb);
726 for (i = nritems - 1; i >= 0; i--) { 797
727 ret = tree_mod_log_insert_key(fs_info, eb, i, 798 tree_mod_log_write_unlock(fs_info);
728 MOD_LOG_KEY_REMOVE_WHILE_FREEING);
729 BUG_ON(ret < 0);
730 }
731} 799}
732 800
733static inline void 801static noinline void
734tree_mod_log_set_root_pointer(struct btrfs_root *root, 802tree_mod_log_set_root_pointer(struct btrfs_root *root,
735 struct extent_buffer *new_root_node) 803 struct extent_buffer *new_root_node)
736{ 804{
737 int ret; 805 int ret;
738 tree_mod_log_free_eb(root->fs_info, root->node);
739 ret = tree_mod_log_insert_root(root->fs_info, root->node, 806 ret = tree_mod_log_insert_root(root->fs_info, root->node,
740 new_root_node, GFP_NOFS); 807 new_root_node, GFP_NOFS);
741 BUG_ON(ret < 0); 808 BUG_ON(ret < 0);
@@ -1069,7 +1136,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1069 unsigned long p_size = sizeof(struct btrfs_key_ptr); 1136 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1070 1137
1071 n = btrfs_header_nritems(eb); 1138 n = btrfs_header_nritems(eb);
1072 while (tm && tm->elem.seq >= time_seq) { 1139 while (tm && tm->seq >= time_seq) {
1073 /* 1140 /*
1074 * all the operations are recorded with the operator used for 1141 * all the operations are recorded with the operator used for
1075 * the modification. as we're going backwards, we do the 1142 * the modification. as we're going backwards, we do the
@@ -2722,6 +2789,78 @@ done:
2722} 2789}
2723 2790
2724/* 2791/*
2792 * helper to use instead of search slot if no exact match is needed but
2793 * instead the next or previous item should be returned.
2794 * When find_higher is true, the next higher item is returned, the next lower
2795 * otherwise.
2796 * When return_any and find_higher are both true, and no higher item is found,
2797 * return the next lower instead.
2798 * When return_any is true and find_higher is false, and no lower item is found,
2799 * return the next higher instead.
2800 * It returns 0 if any item is found, 1 if none is found (tree empty), and
2801 * < 0 on error
2802 */
2803int btrfs_search_slot_for_read(struct btrfs_root *root,
2804 struct btrfs_key *key, struct btrfs_path *p,
2805 int find_higher, int return_any)
2806{
2807 int ret;
2808 struct extent_buffer *leaf;
2809
2810again:
2811 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2812 if (ret <= 0)
2813 return ret;
2814 /*
2815 * a return value of 1 means the path is at the position where the
2816 * item should be inserted. Normally this is the next bigger item,
2817 * but in case the previous item is the last in a leaf, path points
2818 * to the first free slot in the previous leaf, i.e. at an invalid
2819 * item.
2820 */
2821 leaf = p->nodes[0];
2822
2823 if (find_higher) {
2824 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2825 ret = btrfs_next_leaf(root, p);
2826 if (ret <= 0)
2827 return ret;
2828 if (!return_any)
2829 return 1;
2830 /*
2831 * no higher item found, return the next
2832 * lower instead
2833 */
2834 return_any = 0;
2835 find_higher = 0;
2836 btrfs_release_path(p);
2837 goto again;
2838 }
2839 } else {
2840 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2841 /* we're sitting on an invalid slot */
2842 if (p->slots[0] == 0) {
2843 ret = btrfs_prev_leaf(root, p);
2844 if (ret <= 0)
2845 return ret;
2846 if (!return_any)
2847 return 1;
2848 /*
2849 * no lower item found, return the next
2850 * higher instead
2851 */
2852 return_any = 0;
2853 find_higher = 1;
2854 btrfs_release_path(p);
2855 goto again;
2856 }
2857 --p->slots[0];
2858 }
2859 }
2860 return 0;
2861}
2862
2863/*
2725 * adjust the pointers going up the tree, starting at level 2864 * adjust the pointers going up the tree, starting at level
2726 * making sure the right key of each node is points to 'key'. 2865 * making sure the right key of each node is points to 'key'.
2727 * This is used after shifting pointers to the left, so it stops 2866 * This is used after shifting pointers to the left, so it stops
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a0ee2f8e056..00f9a50f986 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -91,6 +91,9 @@ struct btrfs_ordered_sum;
91/* for storing balance parameters in the root tree */ 91/* for storing balance parameters in the root tree */
92#define BTRFS_BALANCE_OBJECTID -4ULL 92#define BTRFS_BALANCE_OBJECTID -4ULL
93 93
94/* holds quota configuration and tracking */
95#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
96
94/* orhpan objectid for tracking unlinked/truncated files */ 97/* orhpan objectid for tracking unlinked/truncated files */
95#define BTRFS_ORPHAN_OBJECTID -5ULL 98#define BTRFS_ORPHAN_OBJECTID -5ULL
96 99
@@ -883,6 +886,72 @@ struct btrfs_block_group_item {
883 __le64 flags; 886 __le64 flags;
884} __attribute__ ((__packed__)); 887} __attribute__ ((__packed__));
885 888
889/*
890 * is subvolume quota turned on?
891 */
892#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
893/*
894 * SCANNING is set during the initialization phase
895 */
896#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1)
897/*
898 * Some qgroup entries are known to be out of date,
899 * either because the configuration has changed in a way that
900 * makes a rescan necessary, or because the fs has been mounted
901 * with a non-qgroup-aware version.
902 * Turning qouta off and on again makes it inconsistent, too.
903 */
904#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
905
906#define BTRFS_QGROUP_STATUS_VERSION 1
907
908struct btrfs_qgroup_status_item {
909 __le64 version;
910 /*
911 * the generation is updated during every commit. As older
912 * versions of btrfs are not aware of qgroups, it will be
913 * possible to detect inconsistencies by checking the
914 * generation on mount time
915 */
916 __le64 generation;
917
918 /* flag definitions see above */
919 __le64 flags;
920
921 /*
922 * only used during scanning to record the progress
923 * of the scan. It contains a logical address
924 */
925 __le64 scan;
926} __attribute__ ((__packed__));
927
928struct btrfs_qgroup_info_item {
929 __le64 generation;
930 __le64 rfer;
931 __le64 rfer_cmpr;
932 __le64 excl;
933 __le64 excl_cmpr;
934} __attribute__ ((__packed__));
935
936/* flags definition for qgroup limits */
937#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
938#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
939#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
940#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
941#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
942#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
943
944struct btrfs_qgroup_limit_item {
945 /*
946 * only updated when any of the other values change
947 */
948 __le64 flags;
949 __le64 max_rfer;
950 __le64 max_excl;
951 __le64 rsv_rfer;
952 __le64 rsv_excl;
953} __attribute__ ((__packed__));
954
886struct btrfs_space_info { 955struct btrfs_space_info {
887 u64 flags; 956 u64 flags;
888 957
@@ -1030,6 +1099,13 @@ struct btrfs_block_group_cache {
1030 struct list_head cluster_list; 1099 struct list_head cluster_list;
1031}; 1100};
1032 1101
1102/* delayed seq elem */
1103struct seq_list {
1104 struct list_head list;
1105 u64 seq;
1106};
1107
1108/* fs_info */
1033struct reloc_control; 1109struct reloc_control;
1034struct btrfs_device; 1110struct btrfs_device;
1035struct btrfs_fs_devices; 1111struct btrfs_fs_devices;
@@ -1044,6 +1120,7 @@ struct btrfs_fs_info {
1044 struct btrfs_root *dev_root; 1120 struct btrfs_root *dev_root;
1045 struct btrfs_root *fs_root; 1121 struct btrfs_root *fs_root;
1046 struct btrfs_root *csum_root; 1122 struct btrfs_root *csum_root;
1123 struct btrfs_root *quota_root;
1047 1124
1048 /* the log root tree is a directory of all the other log roots */ 1125 /* the log root tree is a directory of all the other log roots */
1049 struct btrfs_root *log_root_tree; 1126 struct btrfs_root *log_root_tree;
@@ -1144,6 +1221,8 @@ struct btrfs_fs_info {
1144 spinlock_t tree_mod_seq_lock; 1221 spinlock_t tree_mod_seq_lock;
1145 atomic_t tree_mod_seq; 1222 atomic_t tree_mod_seq;
1146 struct list_head tree_mod_seq_list; 1223 struct list_head tree_mod_seq_list;
1224 struct seq_list tree_mod_seq_elem;
1225 wait_queue_head_t tree_mod_seq_wait;
1147 1226
1148 /* this protects tree_mod_log */ 1227 /* this protects tree_mod_log */
1149 rwlock_t tree_mod_log_lock; 1228 rwlock_t tree_mod_log_lock;
@@ -1298,6 +1377,29 @@ struct btrfs_fs_info {
1298#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1377#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1299 u32 check_integrity_print_mask; 1378 u32 check_integrity_print_mask;
1300#endif 1379#endif
1380 /*
1381 * quota information
1382 */
1383 unsigned int quota_enabled:1;
1384
1385 /*
1386 * quota_enabled only changes state after a commit. This holds the
1387 * next state.
1388 */
1389 unsigned int pending_quota_state:1;
1390
1391 /* is qgroup tracking in a consistent state? */
1392 u64 qgroup_flags;
1393
1394 /* holds configuration and tracking. Protected by qgroup_lock */
1395 struct rb_root qgroup_tree;
1396 spinlock_t qgroup_lock;
1397
1398 /* list of dirty qgroups to be written at next commit */
1399 struct list_head dirty_qgroups;
1400
1401 /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
1402 u64 qgroup_seq;
1301 1403
1302 /* filesystem state */ 1404 /* filesystem state */
1303 u64 fs_state; 1405 u64 fs_state;
@@ -1527,6 +1629,30 @@ struct btrfs_ioctl_defrag_range_args {
1527#define BTRFS_DEV_ITEM_KEY 216 1629#define BTRFS_DEV_ITEM_KEY 216
1528#define BTRFS_CHUNK_ITEM_KEY 228 1630#define BTRFS_CHUNK_ITEM_KEY 228
1529 1631
1632/*
1633 * Records the overall state of the qgroups.
1634 * There's only one instance of this key present,
1635 * (0, BTRFS_QGROUP_STATUS_KEY, 0)
1636 */
1637#define BTRFS_QGROUP_STATUS_KEY 240
1638/*
1639 * Records the currently used space of the qgroup.
1640 * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
1641 */
1642#define BTRFS_QGROUP_INFO_KEY 242
1643/*
1644 * Contains the user configured limits for the qgroup.
1645 * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
1646 */
1647#define BTRFS_QGROUP_LIMIT_KEY 244
1648/*
1649 * Records the child-parent relationship of qgroups. For
1650 * each relation, 2 keys are present:
1651 * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
1652 * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
1653 */
1654#define BTRFS_QGROUP_RELATION_KEY 246
1655
1530#define BTRFS_BALANCE_ITEM_KEY 248 1656#define BTRFS_BALANCE_ITEM_KEY 248
1531 1657
1532/* 1658/*
@@ -2508,6 +2634,49 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
2508 sizeof(val)); 2634 sizeof(val));
2509} 2635}
2510 2636
2637/* btrfs_qgroup_status_item */
2638BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
2639 generation, 64);
2640BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
2641 version, 64);
2642BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
2643 flags, 64);
2644BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
2645 scan, 64);
2646
2647/* btrfs_qgroup_info_item */
2648BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
2649 generation, 64);
2650BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64);
2651BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item,
2652 rfer_cmpr, 64);
2653BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64);
2654BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item,
2655 excl_cmpr, 64);
2656
2657BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
2658 struct btrfs_qgroup_info_item, generation, 64);
2659BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item,
2660 rfer, 64);
2661BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr,
2662 struct btrfs_qgroup_info_item, rfer_cmpr, 64);
2663BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item,
2664 excl, 64);
2665BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr,
2666 struct btrfs_qgroup_info_item, excl_cmpr, 64);
2667
2668/* btrfs_qgroup_limit_item */
2669BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
2670 flags, 64);
2671BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item,
2672 max_rfer, 64);
2673BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item,
2674 max_excl, 64);
2675BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item,
2676 rsv_rfer, 64);
2677BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item,
2678 rsv_excl, 64);
2679
2511static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) 2680static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
2512{ 2681{
2513 return sb->s_fs_info; 2682 return sb->s_fs_info;
@@ -2703,6 +2872,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2703int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); 2872int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
2704 2873
2705int btrfs_init_space_info(struct btrfs_fs_info *fs_info); 2874int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
2875int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2876 struct btrfs_fs_info *fs_info);
2706/* ctree.c */ 2877/* ctree.c */
2707int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2878int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2708 int level, int *slot); 2879 int level, int *slot);
@@ -2753,6 +2924,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2753 ins_len, int cow); 2924 ins_len, int cow);
2754int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, 2925int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2755 struct btrfs_path *p, u64 time_seq); 2926 struct btrfs_path *p, u64 time_seq);
2927int btrfs_search_slot_for_read(struct btrfs_root *root,
2928 struct btrfs_key *key, struct btrfs_path *p,
2929 int find_higher, int return_any);
2756int btrfs_realloc_node(struct btrfs_trans_handle *trans, 2930int btrfs_realloc_node(struct btrfs_trans_handle *trans,
2757 struct btrfs_root *root, struct extent_buffer *parent, 2931 struct btrfs_root *root, struct extent_buffer *parent,
2758 int start_slot, int cache_only, u64 *last_ret, 2932 int start_slot, int cache_only, u64 *last_ret,
@@ -2835,11 +3009,22 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
2835 kfree(fs_info->chunk_root); 3009 kfree(fs_info->chunk_root);
2836 kfree(fs_info->dev_root); 3010 kfree(fs_info->dev_root);
2837 kfree(fs_info->csum_root); 3011 kfree(fs_info->csum_root);
3012 kfree(fs_info->quota_root);
2838 kfree(fs_info->super_copy); 3013 kfree(fs_info->super_copy);
2839 kfree(fs_info->super_for_commit); 3014 kfree(fs_info->super_for_commit);
2840 kfree(fs_info); 3015 kfree(fs_info);
2841} 3016}
2842 3017
3018/* tree mod log functions from ctree.c */
3019u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
3020 struct seq_list *elem);
3021void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3022 struct seq_list *elem);
3023static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
3024{
3025 return atomic_inc_return(&fs_info->tree_mod_seq);
3026}
3027
2843/* root-item.c */ 3028/* root-item.c */
2844int btrfs_find_root_ref(struct btrfs_root *tree_root, 3029int btrfs_find_root_ref(struct btrfs_root *tree_root,
2845 struct btrfs_path *path, 3030 struct btrfs_path *path,
@@ -3198,17 +3383,49 @@ void btrfs_reada_detach(void *handle);
3198int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 3383int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
3199 u64 start, int err); 3384 u64 start, int err);
3200 3385
3201/* delayed seq elem */ 3386/* qgroup.c */
3202struct seq_list { 3387struct qgroup_update {
3203 struct list_head list; 3388 struct list_head list;
3204 u64 seq; 3389 struct btrfs_delayed_ref_node *node;
3205 u32 flags; 3390 struct btrfs_delayed_extent_op *extent_op;
3206}; 3391};
3207 3392
3208void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 3393int btrfs_quota_enable(struct btrfs_trans_handle *trans,
3209 struct seq_list *elem); 3394 struct btrfs_fs_info *fs_info);
3210void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 3395int btrfs_quota_disable(struct btrfs_trans_handle *trans,
3211 struct seq_list *elem); 3396 struct btrfs_fs_info *fs_info);
3397int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
3398int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
3399 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
3400int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
3401 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
3402int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
3403 struct btrfs_fs_info *fs_info, u64 qgroupid,
3404 char *name);
3405int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
3406 struct btrfs_fs_info *fs_info, u64 qgroupid);
3407int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
3408 struct btrfs_fs_info *fs_info, u64 qgroupid,
3409 struct btrfs_qgroup_limit *limit);
3410int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
3411void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
3412struct btrfs_delayed_extent_op;
3413int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
3414 struct btrfs_delayed_ref_node *node,
3415 struct btrfs_delayed_extent_op *extent_op);
3416int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
3417 struct btrfs_fs_info *fs_info,
3418 struct btrfs_delayed_ref_node *node,
3419 struct btrfs_delayed_extent_op *extent_op);
3420int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
3421 struct btrfs_fs_info *fs_info);
3422int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
3423 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
3424 struct btrfs_qgroup_inherit *inherit);
3425int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
3426void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
3427
3428void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
3212 3429
3213static inline int is_fstree(u64 rootid) 3430static inline int is_fstree(u64 rootid)
3214{ 3431{
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 13ae7b04790..da7419ed01b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
233 return 0; 233 return 0;
234} 234}
235 235
236int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, 236int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
237 struct btrfs_delayed_ref_root *delayed_refs,
237 u64 seq) 238 u64 seq)
238{ 239{
239 struct seq_list *elem; 240 struct seq_list *elem;
240 241 int ret = 0;
241 assert_spin_locked(&delayed_refs->lock); 242
242 if (list_empty(&delayed_refs->seq_head)) 243 spin_lock(&fs_info->tree_mod_seq_lock);
243 return 0; 244 if (!list_empty(&fs_info->tree_mod_seq_list)) {
244 245 elem = list_first_entry(&fs_info->tree_mod_seq_list,
245 elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); 246 struct seq_list, list);
246 if (seq >= elem->seq) { 247 if (seq >= elem->seq) {
247 pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", 248 pr_debug("holding back delayed_ref %llu, lowest is "
248 seq, elem->seq, delayed_refs); 249 "%llu (%p)\n", seq, elem->seq, delayed_refs);
249 return 1; 250 ret = 1;
251 }
250 } 252 }
251 return 0; 253
254 spin_unlock(&fs_info->tree_mod_seq_lock);
255 return ret;
252} 256}
253 257
254int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 258int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
@@ -525,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
525 ref->is_head = 0; 529 ref->is_head = 0;
526 ref->in_tree = 1; 530 ref->in_tree = 1;
527 531
528 if (is_fstree(ref_root)) 532 if (need_ref_seq(for_cow, ref_root))
529 seq = inc_delayed_seq(delayed_refs); 533 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
530 ref->seq = seq; 534 ref->seq = seq;
531 535
532 full_ref = btrfs_delayed_node_to_tree_ref(ref); 536 full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -584,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
584 ref->is_head = 0; 588 ref->is_head = 0;
585 ref->in_tree = 1; 589 ref->in_tree = 1;
586 590
587 if (is_fstree(ref_root)) 591 if (need_ref_seq(for_cow, ref_root))
588 seq = inc_delayed_seq(delayed_refs); 592 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
589 ref->seq = seq; 593 ref->seq = seq;
590 594
591 full_ref = btrfs_delayed_node_to_data_ref(ref); 595 full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -658,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
658 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 662 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
659 num_bytes, parent, ref_root, level, action, 663 num_bytes, parent, ref_root, level, action,
660 for_cow); 664 for_cow);
661 if (!is_fstree(ref_root) && 665 if (!need_ref_seq(for_cow, ref_root) &&
662 waitqueue_active(&delayed_refs->seq_wait)) 666 waitqueue_active(&fs_info->tree_mod_seq_wait))
663 wake_up(&delayed_refs->seq_wait); 667 wake_up(&fs_info->tree_mod_seq_wait);
664 spin_unlock(&delayed_refs->lock); 668 spin_unlock(&delayed_refs->lock);
669 if (need_ref_seq(for_cow, ref_root))
670 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
665 671
666 return 0; 672 return 0;
667} 673}
@@ -707,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
707 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 713 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
708 num_bytes, parent, ref_root, owner, offset, 714 num_bytes, parent, ref_root, owner, offset,
709 action, for_cow); 715 action, for_cow);
710 if (!is_fstree(ref_root) && 716 if (!need_ref_seq(for_cow, ref_root) &&
711 waitqueue_active(&delayed_refs->seq_wait)) 717 waitqueue_active(&fs_info->tree_mod_seq_wait))
712 wake_up(&delayed_refs->seq_wait); 718 wake_up(&fs_info->tree_mod_seq_wait);
713 spin_unlock(&delayed_refs->lock); 719 spin_unlock(&delayed_refs->lock);
720 if (need_ref_seq(for_cow, ref_root))
721 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
714 722
715 return 0; 723 return 0;
716} 724}
@@ -736,8 +744,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
736 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
737 extent_op->is_data); 745 extent_op->is_data);
738 746
739 if (waitqueue_active(&delayed_refs->seq_wait)) 747 if (waitqueue_active(&fs_info->tree_mod_seq_wait))
740 wake_up(&delayed_refs->seq_wait); 748 wake_up(&fs_info->tree_mod_seq_wait);
741 spin_unlock(&delayed_refs->lock); 749 spin_unlock(&delayed_refs->lock);
742 return 0; 750 return 0;
743} 751}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 413927fb995..0d7c90c366b 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root {
139 int flushing; 139 int flushing;
140 140
141 u64 run_delayed_start; 141 u64 run_delayed_start;
142
143 /*
144 * seq number of delayed refs. We need to know if a backref was being
145 * added before the currently processed ref or afterwards.
146 */
147 u64 seq;
148
149 /*
150 * seq_list holds a list of all seq numbers that are currently being
151 * added to the list. While walking backrefs (btrfs_find_all_roots,
152 * qgroups), which might take some time, no newer ref must be processed,
153 * as it might influence the outcome of the walk.
154 */
155 struct list_head seq_head;
156
157 /*
158 * when the only refs we have in the list must not be processed, we want
159 * to wait for more refs to show up or for the end of backref walking.
160 */
161 wait_queue_head_t seq_wait;
162}; 142};
163 143
164static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) 144static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@@ -195,34 +175,28 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
195int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 175int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
196 struct list_head *cluster, u64 search_start); 176 struct list_head *cluster, u64 search_start);
197 177
198static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) 178int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
199{ 179 struct btrfs_delayed_ref_root *delayed_refs,
200 assert_spin_locked(&delayed_refs->lock); 180 u64 seq);
201 ++delayed_refs->seq;
202 return delayed_refs->seq;
203}
204 181
205static inline void 182/*
206btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, 183 * delayed refs with a ref_seq > 0 must be held back during backref walking.
207 struct seq_list *elem) 184 * this only applies to items in one of the fs-trees. for_cow items never need
185 * to be held back, so they won't get a ref_seq number.
186 */
187static inline int need_ref_seq(int for_cow, u64 rootid)
208{ 188{
209 assert_spin_locked(&delayed_refs->lock); 189 if (for_cow)
210 elem->seq = delayed_refs->seq; 190 return 0;
211 list_add_tail(&elem->list, &delayed_refs->seq_head);
212}
213 191
214static inline void 192 if (rootid == BTRFS_FS_TREE_OBJECTID)
215btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, 193 return 1;
216 struct seq_list *elem)
217{
218 spin_lock(&delayed_refs->lock);
219 list_del(&elem->list);
220 wake_up(&delayed_refs->seq_wait);
221 spin_unlock(&delayed_refs->lock);
222}
223 194
224int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, 195 if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
225 u64 seq); 196 return 1;
197
198 return 0;
199}
226 200
227/* 201/*
228 * a node might live in a head or a regular ref, this lets you 202 * a node might live in a head or a regular ref, this lets you
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1a4a2a97592..05f4fb6e060 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1225,6 +1225,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
1225 return root; 1225 return root;
1226} 1226}
1227 1227
1228struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1229 struct btrfs_fs_info *fs_info,
1230 u64 objectid)
1231{
1232 struct extent_buffer *leaf;
1233 struct btrfs_root *tree_root = fs_info->tree_root;
1234 struct btrfs_root *root;
1235 struct btrfs_key key;
1236 int ret = 0;
1237 u64 bytenr;
1238
1239 root = btrfs_alloc_root(fs_info);
1240 if (!root)
1241 return ERR_PTR(-ENOMEM);
1242
1243 __setup_root(tree_root->nodesize, tree_root->leafsize,
1244 tree_root->sectorsize, tree_root->stripesize,
1245 root, fs_info, objectid);
1246 root->root_key.objectid = objectid;
1247 root->root_key.type = BTRFS_ROOT_ITEM_KEY;
1248 root->root_key.offset = 0;
1249
1250 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
1251 0, objectid, NULL, 0, 0, 0);
1252 if (IS_ERR(leaf)) {
1253 ret = PTR_ERR(leaf);
1254 goto fail;
1255 }
1256
1257 bytenr = leaf->start;
1258 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1259 btrfs_set_header_bytenr(leaf, leaf->start);
1260 btrfs_set_header_generation(leaf, trans->transid);
1261 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1262 btrfs_set_header_owner(leaf, objectid);
1263 root->node = leaf;
1264
1265 write_extent_buffer(leaf, fs_info->fsid,
1266 (unsigned long)btrfs_header_fsid(leaf),
1267 BTRFS_FSID_SIZE);
1268 write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
1269 (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
1270 BTRFS_UUID_SIZE);
1271 btrfs_mark_buffer_dirty(leaf);
1272
1273 root->commit_root = btrfs_root_node(root);
1274 root->track_dirty = 1;
1275
1276
1277 root->root_item.flags = 0;
1278 root->root_item.byte_limit = 0;
1279 btrfs_set_root_bytenr(&root->root_item, leaf->start);
1280 btrfs_set_root_generation(&root->root_item, trans->transid);
1281 btrfs_set_root_level(&root->root_item, 0);
1282 btrfs_set_root_refs(&root->root_item, 1);
1283 btrfs_set_root_used(&root->root_item, leaf->len);
1284 btrfs_set_root_last_snapshot(&root->root_item, 0);
1285 btrfs_set_root_dirid(&root->root_item, 0);
1286 root->root_item.drop_level = 0;
1287
1288 key.objectid = objectid;
1289 key.type = BTRFS_ROOT_ITEM_KEY;
1290 key.offset = 0;
1291 ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
1292 if (ret)
1293 goto fail;
1294
1295 btrfs_tree_unlock(leaf);
1296
1297fail:
1298 if (ret)
1299 return ERR_PTR(ret);
1300
1301 return root;
1302}
1303
1228static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, 1304static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1229 struct btrfs_fs_info *fs_info) 1305 struct btrfs_fs_info *fs_info)
1230{ 1306{
@@ -1396,6 +1472,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1396 return fs_info->dev_root; 1472 return fs_info->dev_root;
1397 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) 1473 if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
1398 return fs_info->csum_root; 1474 return fs_info->csum_root;
1475 if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
1476 return fs_info->quota_root ? fs_info->quota_root :
1477 ERR_PTR(-ENOENT);
1399again: 1478again:
1400 spin_lock(&fs_info->fs_roots_radix_lock); 1479 spin_lock(&fs_info->fs_roots_radix_lock);
1401 root = radix_tree_lookup(&fs_info->fs_roots_radix, 1480 root = radix_tree_lookup(&fs_info->fs_roots_radix,
@@ -1823,6 +1902,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1823 free_extent_buffer(info->extent_root->commit_root); 1902 free_extent_buffer(info->extent_root->commit_root);
1824 free_extent_buffer(info->csum_root->node); 1903 free_extent_buffer(info->csum_root->node);
1825 free_extent_buffer(info->csum_root->commit_root); 1904 free_extent_buffer(info->csum_root->commit_root);
1905 if (info->quota_root) {
1906 free_extent_buffer(info->quota_root->node);
1907 free_extent_buffer(info->quota_root->commit_root);
1908 }
1826 1909
1827 info->tree_root->node = NULL; 1910 info->tree_root->node = NULL;
1828 info->tree_root->commit_root = NULL; 1911 info->tree_root->commit_root = NULL;
@@ -1832,6 +1915,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1832 info->extent_root->commit_root = NULL; 1915 info->extent_root->commit_root = NULL;
1833 info->csum_root->node = NULL; 1916 info->csum_root->node = NULL;
1834 info->csum_root->commit_root = NULL; 1917 info->csum_root->commit_root = NULL;
1918 if (info->quota_root) {
1919 info->quota_root->node = NULL;
1920 info->quota_root->commit_root = NULL;
1921 }
1835 1922
1836 if (chunk_root) { 1923 if (chunk_root) {
1837 free_extent_buffer(info->chunk_root->node); 1924 free_extent_buffer(info->chunk_root->node);
@@ -1862,6 +1949,7 @@ int open_ctree(struct super_block *sb,
1862 struct btrfs_root *csum_root; 1949 struct btrfs_root *csum_root;
1863 struct btrfs_root *chunk_root; 1950 struct btrfs_root *chunk_root;
1864 struct btrfs_root *dev_root; 1951 struct btrfs_root *dev_root;
1952 struct btrfs_root *quota_root;
1865 struct btrfs_root *log_tree_root; 1953 struct btrfs_root *log_tree_root;
1866 int ret; 1954 int ret;
1867 int err = -EINVAL; 1955 int err = -EINVAL;
@@ -1873,9 +1961,10 @@ int open_ctree(struct super_block *sb,
1873 csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); 1961 csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
1874 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 1962 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
1875 dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); 1963 dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
1964 quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
1876 1965
1877 if (!tree_root || !extent_root || !csum_root || 1966 if (!tree_root || !extent_root || !csum_root ||
1878 !chunk_root || !dev_root) { 1967 !chunk_root || !dev_root || !quota_root) {
1879 err = -ENOMEM; 1968 err = -ENOMEM;
1880 goto fail; 1969 goto fail;
1881 } 1970 }
@@ -1944,6 +2033,8 @@ int open_ctree(struct super_block *sb,
1944 fs_info->free_chunk_space = 0; 2033 fs_info->free_chunk_space = 0;
1945 fs_info->tree_mod_log = RB_ROOT; 2034 fs_info->tree_mod_log = RB_ROOT;
1946 2035
2036 init_waitqueue_head(&fs_info->tree_mod_seq_wait);
2037
1947 /* readahead state */ 2038 /* readahead state */
1948 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2039 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
1949 spin_lock_init(&fs_info->reada_lock); 2040 spin_lock_init(&fs_info->reada_lock);
@@ -2032,6 +2123,13 @@ int open_ctree(struct super_block *sb,
2032 init_rwsem(&fs_info->cleanup_work_sem); 2123 init_rwsem(&fs_info->cleanup_work_sem);
2033 init_rwsem(&fs_info->subvol_sem); 2124 init_rwsem(&fs_info->subvol_sem);
2034 2125
2126 spin_lock_init(&fs_info->qgroup_lock);
2127 fs_info->qgroup_tree = RB_ROOT;
2128 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2129 fs_info->qgroup_seq = 1;
2130 fs_info->quota_enabled = 0;
2131 fs_info->pending_quota_state = 0;
2132
2035 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 2133 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
2036 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 2134 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
2037 2135
@@ -2356,6 +2454,17 @@ retry_root_backup:
2356 goto recovery_tree_root; 2454 goto recovery_tree_root;
2357 csum_root->track_dirty = 1; 2455 csum_root->track_dirty = 1;
2358 2456
2457 ret = find_and_setup_root(tree_root, fs_info,
2458 BTRFS_QUOTA_TREE_OBJECTID, quota_root);
2459 if (ret) {
2460 kfree(quota_root);
2461 quota_root = fs_info->quota_root = NULL;
2462 } else {
2463 quota_root->track_dirty = 1;
2464 fs_info->quota_enabled = 1;
2465 fs_info->pending_quota_state = 1;
2466 }
2467
2359 fs_info->generation = generation; 2468 fs_info->generation = generation;
2360 fs_info->last_trans_committed = generation; 2469 fs_info->last_trans_committed = generation;
2361 2470
@@ -2415,6 +2524,9 @@ retry_root_backup:
2415 " integrity check module %s\n", sb->s_id); 2524 " integrity check module %s\n", sb->s_id);
2416 } 2525 }
2417#endif 2526#endif
2527 ret = btrfs_read_qgroup_config(fs_info);
2528 if (ret)
2529 goto fail_trans_kthread;
2418 2530
2419 /* do not make disk changes in broken FS */ 2531 /* do not make disk changes in broken FS */
2420 if (btrfs_super_log_root(disk_super) != 0 && 2532 if (btrfs_super_log_root(disk_super) != 0 &&
@@ -2425,7 +2537,7 @@ retry_root_backup:
2425 printk(KERN_WARNING "Btrfs log replay required " 2537 printk(KERN_WARNING "Btrfs log replay required "
2426 "on RO media\n"); 2538 "on RO media\n");
2427 err = -EIO; 2539 err = -EIO;
2428 goto fail_trans_kthread; 2540 goto fail_qgroup;
2429 } 2541 }
2430 blocksize = 2542 blocksize =
2431 btrfs_level_size(tree_root, 2543 btrfs_level_size(tree_root,
@@ -2434,7 +2546,7 @@ retry_root_backup:
2434 log_tree_root = btrfs_alloc_root(fs_info); 2546 log_tree_root = btrfs_alloc_root(fs_info);
2435 if (!log_tree_root) { 2547 if (!log_tree_root) {
2436 err = -ENOMEM; 2548 err = -ENOMEM;
2437 goto fail_trans_kthread; 2549 goto fail_qgroup;
2438 } 2550 }
2439 2551
2440 __setup_root(nodesize, leafsize, sectorsize, stripesize, 2552 __setup_root(nodesize, leafsize, sectorsize, stripesize,
@@ -2474,7 +2586,7 @@ retry_root_backup:
2474 printk(KERN_WARNING 2586 printk(KERN_WARNING
2475 "btrfs: failed to recover relocation\n"); 2587 "btrfs: failed to recover relocation\n");
2476 err = -EINVAL; 2588 err = -EINVAL;
2477 goto fail_trans_kthread; 2589 goto fail_qgroup;
2478 } 2590 }
2479 } 2591 }
2480 2592
@@ -2484,10 +2596,10 @@ retry_root_backup:
2484 2596
2485 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 2597 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
2486 if (!fs_info->fs_root) 2598 if (!fs_info->fs_root)
2487 goto fail_trans_kthread; 2599 goto fail_qgroup;
2488 if (IS_ERR(fs_info->fs_root)) { 2600 if (IS_ERR(fs_info->fs_root)) {
2489 err = PTR_ERR(fs_info->fs_root); 2601 err = PTR_ERR(fs_info->fs_root);
2490 goto fail_trans_kthread; 2602 goto fail_qgroup;
2491 } 2603 }
2492 2604
2493 if (sb->s_flags & MS_RDONLY) 2605 if (sb->s_flags & MS_RDONLY)
@@ -2511,6 +2623,8 @@ retry_root_backup:
2511 2623
2512 return 0; 2624 return 0;
2513 2625
2626fail_qgroup:
2627 btrfs_free_qgroup_config(fs_info);
2514fail_trans_kthread: 2628fail_trans_kthread:
2515 kthread_stop(fs_info->transaction_kthread); 2629 kthread_stop(fs_info->transaction_kthread);
2516fail_cleaner: 2630fail_cleaner:
@@ -3109,6 +3223,8 @@ int close_ctree(struct btrfs_root *root)
3109 fs_info->closing = 2; 3223 fs_info->closing = 2;
3110 smp_mb(); 3224 smp_mb();
3111 3225
3226 btrfs_free_qgroup_config(root->fs_info);
3227
3112 if (fs_info->delalloc_bytes) { 3228 if (fs_info->delalloc_bytes) {
3113 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 3229 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
3114 (unsigned long long)fs_info->delalloc_bytes); 3230 (unsigned long long)fs_info->delalloc_bytes);
@@ -3128,6 +3244,10 @@ int close_ctree(struct btrfs_root *root)
3128 free_extent_buffer(fs_info->dev_root->commit_root); 3244 free_extent_buffer(fs_info->dev_root->commit_root);
3129 free_extent_buffer(fs_info->csum_root->node); 3245 free_extent_buffer(fs_info->csum_root->node);
3130 free_extent_buffer(fs_info->csum_root->commit_root); 3246 free_extent_buffer(fs_info->csum_root->commit_root);
3247 if (fs_info->quota_root) {
3248 free_extent_buffer(fs_info->quota_root->node);
3249 free_extent_buffer(fs_info->quota_root->commit_root);
3250 }
3131 3251
3132 btrfs_free_block_groups(fs_info); 3252 btrfs_free_block_groups(fs_info);
3133 3253
@@ -3258,7 +3378,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3258 return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 3378 return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3259} 3379}
3260 3380
3261static int btree_lock_page_hook(struct page *page, void *data, 3381int btree_lock_page_hook(struct page *page, void *data,
3262 void (*flush_fn)(void *)) 3382 void (*flush_fn)(void *))
3263{ 3383{
3264 struct inode *inode = page->mapping->host; 3384 struct inode *inode = page->mapping->host;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 05b3fab39f7..95e147eea23 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
89int btrfs_cleanup_transaction(struct btrfs_root *root); 89int btrfs_cleanup_transaction(struct btrfs_root *root);
90void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, 90void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
91 struct btrfs_root *root); 91 struct btrfs_root *root);
92void btrfs_abort_devices(struct btrfs_root *root);
93struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
94 struct btrfs_fs_info *fs_info,
95 u64 objectid);
96int btree_lock_page_hook(struct page *page, void *data,
97 void (*flush_fn)(void *));
92 98
93#ifdef CONFIG_DEBUG_LOCK_ALLOC 99#ifdef CONFIG_DEBUG_LOCK_ALLOC
94void btrfs_init_lockdep(void); 100void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71b2d1c7da6..44f06201f37 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -34,6 +34,8 @@
34#include "locking.h" 34#include "locking.h"
35#include "free-space-cache.h" 35#include "free-space-cache.h"
36 36
37#undef SCRAMBLE_DELAYED_REFS
38
37/* 39/*
38 * control flags for do_chunk_alloc's force field 40 * control flags for do_chunk_alloc's force field
39 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk 41 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
@@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2217 struct btrfs_delayed_ref_node *ref; 2219 struct btrfs_delayed_ref_node *ref;
2218 struct btrfs_delayed_ref_head *locked_ref = NULL; 2220 struct btrfs_delayed_ref_head *locked_ref = NULL;
2219 struct btrfs_delayed_extent_op *extent_op; 2221 struct btrfs_delayed_extent_op *extent_op;
2222 struct btrfs_fs_info *fs_info = root->fs_info;
2220 int ret; 2223 int ret;
2221 int count = 0; 2224 int count = 0;
2222 int must_insert_reserved = 0; 2225 int must_insert_reserved = 0;
@@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2255 ref = select_delayed_ref(locked_ref); 2258 ref = select_delayed_ref(locked_ref);
2256 2259
2257 if (ref && ref->seq && 2260 if (ref && ref->seq &&
2258 btrfs_check_delayed_seq(delayed_refs, ref->seq)) { 2261 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2259 /* 2262 /*
2260 * there are still refs with lower seq numbers in the 2263 * there are still refs with lower seq numbers in the
2261 * process of being added. Don't run this ref yet. 2264 * process of being added. Don't run this ref yet.
@@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2337 } 2340 }
2338 2341
2339next: 2342next:
2340 do_chunk_alloc(trans, root->fs_info->extent_root, 2343 do_chunk_alloc(trans, fs_info->extent_root,
2341 2 * 1024 * 1024, 2344 2 * 1024 * 1024,
2342 btrfs_get_alloc_profile(root, 0), 2345 btrfs_get_alloc_profile(root, 0),
2343 CHUNK_ALLOC_NO_FORCE); 2346 CHUNK_ALLOC_NO_FORCE);
@@ -2347,21 +2350,99 @@ next:
2347 return count; 2350 return count;
2348} 2351}
2349 2352
2350static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2353static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2354 struct btrfs_delayed_ref_root *delayed_refs,
2351 unsigned long num_refs, 2355 unsigned long num_refs,
2352 struct list_head *first_seq) 2356 struct list_head *first_seq)
2353{ 2357{
2354 spin_unlock(&delayed_refs->lock); 2358 spin_unlock(&delayed_refs->lock);
2355 pr_debug("waiting for more refs (num %ld, first %p)\n", 2359 pr_debug("waiting for more refs (num %ld, first %p)\n",
2356 num_refs, first_seq); 2360 num_refs, first_seq);
2357 wait_event(delayed_refs->seq_wait, 2361 wait_event(fs_info->tree_mod_seq_wait,
2358 num_refs != delayed_refs->num_entries || 2362 num_refs != delayed_refs->num_entries ||
2359 delayed_refs->seq_head.next != first_seq); 2363 fs_info->tree_mod_seq_list.next != first_seq);
2360 pr_debug("done waiting for more refs (num %ld, first %p)\n", 2364 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2361 delayed_refs->num_entries, delayed_refs->seq_head.next); 2365 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2362 spin_lock(&delayed_refs->lock); 2366 spin_lock(&delayed_refs->lock);
2363} 2367}
2364 2368
2369#ifdef SCRAMBLE_DELAYED_REFS
2370/*
2371 * Normally delayed refs get processed in ascending bytenr order. This
2372 * correlates in most cases to the order added. To expose dependencies on this
2373 * order, we start to process the tree in the middle instead of the beginning
2374 */
2375static u64 find_middle(struct rb_root *root)
2376{
2377 struct rb_node *n = root->rb_node;
2378 struct btrfs_delayed_ref_node *entry;
2379 int alt = 1;
2380 u64 middle;
2381 u64 first = 0, last = 0;
2382
2383 n = rb_first(root);
2384 if (n) {
2385 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2386 first = entry->bytenr;
2387 }
2388 n = rb_last(root);
2389 if (n) {
2390 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2391 last = entry->bytenr;
2392 }
2393 n = root->rb_node;
2394
2395 while (n) {
2396 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2397 WARN_ON(!entry->in_tree);
2398
2399 middle = entry->bytenr;
2400
2401 if (alt)
2402 n = n->rb_left;
2403 else
2404 n = n->rb_right;
2405
2406 alt = 1 - alt;
2407 }
2408 return middle;
2409}
2410#endif
2411
2412int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2413 struct btrfs_fs_info *fs_info)
2414{
2415 struct qgroup_update *qgroup_update;
2416 int ret = 0;
2417
2418 if (list_empty(&trans->qgroup_ref_list) !=
2419 !trans->delayed_ref_elem.seq) {
2420 /* list without seq or seq without list */
2421 printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n",
2422 list_empty(&trans->qgroup_ref_list) ? "" : " not",
2423 trans->delayed_ref_elem.seq);
2424 BUG();
2425 }
2426
2427 if (!trans->delayed_ref_elem.seq)
2428 return 0;
2429
2430 while (!list_empty(&trans->qgroup_ref_list)) {
2431 qgroup_update = list_first_entry(&trans->qgroup_ref_list,
2432 struct qgroup_update, list);
2433 list_del(&qgroup_update->list);
2434 if (!ret)
2435 ret = btrfs_qgroup_account_ref(
2436 trans, fs_info, qgroup_update->node,
2437 qgroup_update->extent_op);
2438 kfree(qgroup_update);
2439 }
2440
2441 btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
2442
2443 return ret;
2444}
2445
2365/* 2446/*
2366 * this starts processing the delayed reference count updates and 2447 * this starts processing the delayed reference count updates and
2367 * extent insertions we have queued up so far. count can be 2448 * extent insertions we have queued up so far. count can be
@@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2398 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), 2479 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
2399 CHUNK_ALLOC_NO_FORCE); 2480 CHUNK_ALLOC_NO_FORCE);
2400 2481
2482 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2483
2401 delayed_refs = &trans->transaction->delayed_refs; 2484 delayed_refs = &trans->transaction->delayed_refs;
2402 INIT_LIST_HEAD(&cluster); 2485 INIT_LIST_HEAD(&cluster);
2403again: 2486again:
2404 consider_waiting = 0; 2487 consider_waiting = 0;
2405 spin_lock(&delayed_refs->lock); 2488 spin_lock(&delayed_refs->lock);
2489
2490#ifdef SCRAMBLE_DELAYED_REFS
2491 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2492#endif
2493
2406 if (count == 0) { 2494 if (count == 0) {
2407 count = delayed_refs->num_entries * 2; 2495 count = delayed_refs->num_entries * 2;
2408 run_most = 1; 2496 run_most = 1;
@@ -2437,7 +2525,7 @@ again:
2437 num_refs = delayed_refs->num_entries; 2525 num_refs = delayed_refs->num_entries;
2438 first_seq = root->fs_info->tree_mod_seq_list.next; 2526 first_seq = root->fs_info->tree_mod_seq_list.next;
2439 } else { 2527 } else {
2440 wait_for_more_refs(delayed_refs, 2528 wait_for_more_refs(root->fs_info, delayed_refs,
2441 num_refs, first_seq); 2529 num_refs, first_seq);
2442 /* 2530 /*
2443 * after waiting, things have changed. we 2531 * after waiting, things have changed. we
@@ -2502,6 +2590,7 @@ again:
2502 } 2590 }
2503out: 2591out:
2504 spin_unlock(&delayed_refs->lock); 2592 spin_unlock(&delayed_refs->lock);
2593 assert_qgroups_uptodate(trans);
2505 return 0; 2594 return 0;
2506} 2595}
2507 2596
@@ -4479,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4479 csum_bytes = BTRFS_I(inode)->csum_bytes; 4568 csum_bytes = BTRFS_I(inode)->csum_bytes;
4480 spin_unlock(&BTRFS_I(inode)->lock); 4569 spin_unlock(&BTRFS_I(inode)->lock);
4481 4570
4571 if (root->fs_info->quota_enabled) {
4572 ret = btrfs_qgroup_reserve(root, num_bytes +
4573 nr_extents * root->leafsize);
4574 if (ret)
4575 return ret;
4576 }
4577
4482 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4578 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
4483 if (ret) { 4579 if (ret) {
4484 u64 to_free = 0; 4580 u64 to_free = 0;
@@ -4557,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4557 4653
4558 trace_btrfs_space_reservation(root->fs_info, "delalloc", 4654 trace_btrfs_space_reservation(root->fs_info, "delalloc",
4559 btrfs_ino(inode), to_free, 0); 4655 btrfs_ino(inode), to_free, 0);
4656 if (root->fs_info->quota_enabled) {
4657 btrfs_qgroup_free(root, num_bytes +
4658 dropped * root->leafsize);
4659 }
4660
4560 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4661 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4561 to_free); 4662 to_free);
4562} 4663}
@@ -5193,8 +5294,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5193 rb_erase(&head->node.rb_node, &delayed_refs->root); 5294 rb_erase(&head->node.rb_node, &delayed_refs->root);
5194 5295
5195 delayed_refs->num_entries--; 5296 delayed_refs->num_entries--;
5196 if (waitqueue_active(&delayed_refs->seq_wait)) 5297 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5197 wake_up(&delayed_refs->seq_wait); 5298 wake_up(&root->fs_info->tree_mod_seq_wait);
5198 5299
5199 /* 5300 /*
5200 * we don't take a ref on the node because we're removing it from the 5301 * we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 17facea6a51..e54b663fd3a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -336,7 +336,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
336static noinline int create_subvol(struct btrfs_root *root, 336static noinline int create_subvol(struct btrfs_root *root,
337 struct dentry *dentry, 337 struct dentry *dentry,
338 char *name, int namelen, 338 char *name, int namelen,
339 u64 *async_transid) 339 u64 *async_transid,
340 struct btrfs_qgroup_inherit **inherit)
340{ 341{
341 struct btrfs_trans_handle *trans; 342 struct btrfs_trans_handle *trans;
342 struct btrfs_key key; 343 struct btrfs_key key;
@@ -368,6 +369,11 @@ static noinline int create_subvol(struct btrfs_root *root,
368 if (IS_ERR(trans)) 369 if (IS_ERR(trans))
369 return PTR_ERR(trans); 370 return PTR_ERR(trans);
370 371
372 ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
373 inherit ? *inherit : NULL);
374 if (ret)
375 goto fail;
376
371 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 377 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
372 0, objectid, NULL, 0, 0, 0); 378 0, objectid, NULL, 0, 0, 0);
373 if (IS_ERR(leaf)) { 379 if (IS_ERR(leaf)) {
@@ -484,7 +490,7 @@ fail:
484 490
485static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 491static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
486 char *name, int namelen, u64 *async_transid, 492 char *name, int namelen, u64 *async_transid,
487 bool readonly) 493 bool readonly, struct btrfs_qgroup_inherit **inherit)
488{ 494{
489 struct inode *inode; 495 struct inode *inode;
490 struct btrfs_pending_snapshot *pending_snapshot; 496 struct btrfs_pending_snapshot *pending_snapshot;
@@ -502,6 +508,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
502 pending_snapshot->dentry = dentry; 508 pending_snapshot->dentry = dentry;
503 pending_snapshot->root = root; 509 pending_snapshot->root = root;
504 pending_snapshot->readonly = readonly; 510 pending_snapshot->readonly = readonly;
511 if (inherit) {
512 pending_snapshot->inherit = *inherit;
513 *inherit = NULL; /* take responsibility to free it */
514 }
505 515
506 trans = btrfs_start_transaction(root->fs_info->extent_root, 5); 516 trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
507 if (IS_ERR(trans)) { 517 if (IS_ERR(trans)) {
@@ -635,7 +645,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
635static noinline int btrfs_mksubvol(struct path *parent, 645static noinline int btrfs_mksubvol(struct path *parent,
636 char *name, int namelen, 646 char *name, int namelen,
637 struct btrfs_root *snap_src, 647 struct btrfs_root *snap_src,
638 u64 *async_transid, bool readonly) 648 u64 *async_transid, bool readonly,
649 struct btrfs_qgroup_inherit **inherit)
639{ 650{
640 struct inode *dir = parent->dentry->d_inode; 651 struct inode *dir = parent->dentry->d_inode;
641 struct dentry *dentry; 652 struct dentry *dentry;
@@ -662,11 +673,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
662 goto out_up_read; 673 goto out_up_read;
663 674
664 if (snap_src) { 675 if (snap_src) {
665 error = create_snapshot(snap_src, dentry, 676 error = create_snapshot(snap_src, dentry, name, namelen,
666 name, namelen, async_transid, readonly); 677 async_transid, readonly, inherit);
667 } else { 678 } else {
668 error = create_subvol(BTRFS_I(dir)->root, dentry, 679 error = create_subvol(BTRFS_I(dir)->root, dentry,
669 name, namelen, async_transid); 680 name, namelen, async_transid, inherit);
670 } 681 }
671 if (!error) 682 if (!error)
672 fsnotify_mkdir(dir, dentry); 683 fsnotify_mkdir(dir, dentry);
@@ -1375,11 +1386,9 @@ out:
1375} 1386}
1376 1387
1377static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1388static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1378 char *name, 1389 char *name, unsigned long fd, int subvol,
1379 unsigned long fd, 1390 u64 *transid, bool readonly,
1380 int subvol, 1391 struct btrfs_qgroup_inherit **inherit)
1381 u64 *transid,
1382 bool readonly)
1383{ 1392{
1384 struct file *src_file; 1393 struct file *src_file;
1385 int namelen; 1394 int namelen;
@@ -1403,7 +1412,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1403 1412
1404 if (subvol) { 1413 if (subvol) {
1405 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1414 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1406 NULL, transid, readonly); 1415 NULL, transid, readonly, inherit);
1407 } else { 1416 } else {
1408 struct inode *src_inode; 1417 struct inode *src_inode;
1409 src_file = fget(fd); 1418 src_file = fget(fd);
@@ -1422,7 +1431,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1422 } 1431 }
1423 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1432 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1424 BTRFS_I(src_inode)->root, 1433 BTRFS_I(src_inode)->root,
1425 transid, readonly); 1434 transid, readonly, inherit);
1426 fput(src_file); 1435 fput(src_file);
1427 } 1436 }
1428out_drop_write: 1437out_drop_write:
@@ -1444,7 +1453,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
1444 1453
1445 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1454 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1446 vol_args->fd, subvol, 1455 vol_args->fd, subvol,
1447 NULL, false); 1456 NULL, false, NULL);
1448 1457
1449 kfree(vol_args); 1458 kfree(vol_args);
1450 return ret; 1459 return ret;
@@ -1458,6 +1467,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1458 u64 transid = 0; 1467 u64 transid = 0;
1459 u64 *ptr = NULL; 1468 u64 *ptr = NULL;
1460 bool readonly = false; 1469 bool readonly = false;
1470 struct btrfs_qgroup_inherit *inherit = NULL;
1461 1471
1462 vol_args = memdup_user(arg, sizeof(*vol_args)); 1472 vol_args = memdup_user(arg, sizeof(*vol_args));
1463 if (IS_ERR(vol_args)) 1473 if (IS_ERR(vol_args))
@@ -1465,7 +1475,8 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1465 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1475 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
1466 1476
1467 if (vol_args->flags & 1477 if (vol_args->flags &
1468 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { 1478 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
1479 BTRFS_SUBVOL_QGROUP_INHERIT)) {
1469 ret = -EOPNOTSUPP; 1480 ret = -EOPNOTSUPP;
1470 goto out; 1481 goto out;
1471 } 1482 }
@@ -1474,10 +1485,21 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1474 ptr = &transid; 1485 ptr = &transid;
1475 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1486 if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
1476 readonly = true; 1487 readonly = true;
1488 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1489 if (vol_args->size > PAGE_CACHE_SIZE) {
1490 ret = -EINVAL;
1491 goto out;
1492 }
1493 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1494 if (IS_ERR(inherit)) {
1495 ret = PTR_ERR(inherit);
1496 goto out;
1497 }
1498 }
1477 1499
1478 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1500 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1479 vol_args->fd, subvol, 1501 vol_args->fd, subvol, ptr,
1480 ptr, readonly); 1502 readonly, &inherit);
1481 1503
1482 if (ret == 0 && ptr && 1504 if (ret == 0 && ptr &&
1483 copy_to_user(arg + 1505 copy_to_user(arg +
@@ -1486,6 +1508,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1486 ret = -EFAULT; 1508 ret = -EFAULT;
1487out: 1509out:
1488 kfree(vol_args); 1510 kfree(vol_args);
1511 kfree(inherit);
1489 return ret; 1512 return ret;
1490} 1513}
1491 1514
@@ -3401,6 +3424,183 @@ out:
3401 return ret; 3424 return ret;
3402} 3425}
3403 3426
3427static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
3428{
3429 struct btrfs_ioctl_quota_ctl_args *sa;
3430 struct btrfs_trans_handle *trans = NULL;
3431 int ret;
3432 int err;
3433
3434 if (!capable(CAP_SYS_ADMIN))
3435 return -EPERM;
3436
3437 if (root->fs_info->sb->s_flags & MS_RDONLY)
3438 return -EROFS;
3439
3440 sa = memdup_user(arg, sizeof(*sa));
3441 if (IS_ERR(sa))
3442 return PTR_ERR(sa);
3443
3444 if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
3445 trans = btrfs_start_transaction(root, 2);
3446 if (IS_ERR(trans)) {
3447 ret = PTR_ERR(trans);
3448 goto out;
3449 }
3450 }
3451
3452 switch (sa->cmd) {
3453 case BTRFS_QUOTA_CTL_ENABLE:
3454 ret = btrfs_quota_enable(trans, root->fs_info);
3455 break;
3456 case BTRFS_QUOTA_CTL_DISABLE:
3457 ret = btrfs_quota_disable(trans, root->fs_info);
3458 break;
3459 case BTRFS_QUOTA_CTL_RESCAN:
3460 ret = btrfs_quota_rescan(root->fs_info);
3461 break;
3462 default:
3463 ret = -EINVAL;
3464 break;
3465 }
3466
3467 if (copy_to_user(arg, sa, sizeof(*sa)))
3468 ret = -EFAULT;
3469
3470 if (trans) {
3471 err = btrfs_commit_transaction(trans, root);
3472 if (err && !ret)
3473 ret = err;
3474 }
3475
3476out:
3477 kfree(sa);
3478 return ret;
3479}
3480
3481static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
3482{
3483 struct btrfs_ioctl_qgroup_assign_args *sa;
3484 struct btrfs_trans_handle *trans;
3485 int ret;
3486 int err;
3487
3488 if (!capable(CAP_SYS_ADMIN))
3489 return -EPERM;
3490
3491 if (root->fs_info->sb->s_flags & MS_RDONLY)
3492 return -EROFS;
3493
3494 sa = memdup_user(arg, sizeof(*sa));
3495 if (IS_ERR(sa))
3496 return PTR_ERR(sa);
3497
3498 trans = btrfs_join_transaction(root);
3499 if (IS_ERR(trans)) {
3500 ret = PTR_ERR(trans);
3501 goto out;
3502 }
3503
3504 /* FIXME: check if the IDs really exist */
3505 if (sa->assign) {
3506 ret = btrfs_add_qgroup_relation(trans, root->fs_info,
3507 sa->src, sa->dst);
3508 } else {
3509 ret = btrfs_del_qgroup_relation(trans, root->fs_info,
3510 sa->src, sa->dst);
3511 }
3512
3513 err = btrfs_end_transaction(trans, root);
3514 if (err && !ret)
3515 ret = err;
3516
3517out:
3518 kfree(sa);
3519 return ret;
3520}
3521
3522static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
3523{
3524 struct btrfs_ioctl_qgroup_create_args *sa;
3525 struct btrfs_trans_handle *trans;
3526 int ret;
3527 int err;
3528
3529 if (!capable(CAP_SYS_ADMIN))
3530 return -EPERM;
3531
3532 if (root->fs_info->sb->s_flags & MS_RDONLY)
3533 return -EROFS;
3534
3535 sa = memdup_user(arg, sizeof(*sa));
3536 if (IS_ERR(sa))
3537 return PTR_ERR(sa);
3538
3539 trans = btrfs_join_transaction(root);
3540 if (IS_ERR(trans)) {
3541 ret = PTR_ERR(trans);
3542 goto out;
3543 }
3544
3545 /* FIXME: check if the IDs really exist */
3546 if (sa->create) {
3547 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
3548 NULL);
3549 } else {
3550 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
3551 }
3552
3553 err = btrfs_end_transaction(trans, root);
3554 if (err && !ret)
3555 ret = err;
3556
3557out:
3558 kfree(sa);
3559 return ret;
3560}
3561
3562static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
3563{
3564 struct btrfs_ioctl_qgroup_limit_args *sa;
3565 struct btrfs_trans_handle *trans;
3566 int ret;
3567 int err;
3568 u64 qgroupid;
3569
3570 if (!capable(CAP_SYS_ADMIN))
3571 return -EPERM;
3572
3573 if (root->fs_info->sb->s_flags & MS_RDONLY)
3574 return -EROFS;
3575
3576 sa = memdup_user(arg, sizeof(*sa));
3577 if (IS_ERR(sa))
3578 return PTR_ERR(sa);
3579
3580 trans = btrfs_join_transaction(root);
3581 if (IS_ERR(trans)) {
3582 ret = PTR_ERR(trans);
3583 goto out;
3584 }
3585
3586 qgroupid = sa->qgroupid;
3587 if (!qgroupid) {
3588 /* take the current subvol as qgroup */
3589 qgroupid = root->root_key.objectid;
3590 }
3591
3592 /* FIXME: check if the IDs really exist */
3593 ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
3594
3595 err = btrfs_end_transaction(trans, root);
3596 if (err && !ret)
3597 ret = err;
3598
3599out:
3600 kfree(sa);
3601 return ret;
3602}
3603
3404long btrfs_ioctl(struct file *file, unsigned int 3604long btrfs_ioctl(struct file *file, unsigned int
3405 cmd, unsigned long arg) 3605 cmd, unsigned long arg)
3406{ 3606{
@@ -3422,6 +3622,8 @@ long btrfs_ioctl(struct file *file, unsigned int
3422 return btrfs_ioctl_snap_create_v2(file, argp, 0); 3622 return btrfs_ioctl_snap_create_v2(file, argp, 0);
3423 case BTRFS_IOC_SUBVOL_CREATE: 3623 case BTRFS_IOC_SUBVOL_CREATE:
3424 return btrfs_ioctl_snap_create(file, argp, 1); 3624 return btrfs_ioctl_snap_create(file, argp, 1);
3625 case BTRFS_IOC_SUBVOL_CREATE_V2:
3626 return btrfs_ioctl_snap_create_v2(file, argp, 1);
3425 case BTRFS_IOC_SNAP_DESTROY: 3627 case BTRFS_IOC_SNAP_DESTROY:
3426 return btrfs_ioctl_snap_destroy(file, argp); 3628 return btrfs_ioctl_snap_destroy(file, argp);
3427 case BTRFS_IOC_SUBVOL_GETFLAGS: 3629 case BTRFS_IOC_SUBVOL_GETFLAGS:
@@ -3485,6 +3687,14 @@ long btrfs_ioctl(struct file *file, unsigned int
3485 return btrfs_ioctl_balance_progress(root, argp); 3687 return btrfs_ioctl_balance_progress(root, argp);
3486 case BTRFS_IOC_GET_DEV_STATS: 3688 case BTRFS_IOC_GET_DEV_STATS:
3487 return btrfs_ioctl_get_dev_stats(root, argp); 3689 return btrfs_ioctl_get_dev_stats(root, argp);
3690 case BTRFS_IOC_QUOTA_CTL:
3691 return btrfs_ioctl_quota_ctl(root, argp);
3692 case BTRFS_IOC_QGROUP_ASSIGN:
3693 return btrfs_ioctl_qgroup_assign(root, argp);
3694 case BTRFS_IOC_QGROUP_CREATE:
3695 return btrfs_ioctl_qgroup_create(root, argp);
3696 case BTRFS_IOC_QGROUP_LIMIT:
3697 return btrfs_ioctl_qgroup_limit(root, argp);
3488 } 3698 }
3489 3699
3490 return -ENOTTY; 3700 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 4e3e5d342a2..3f9701d571e 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -32,15 +32,46 @@ struct btrfs_ioctl_vol_args {
32 32
33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) 33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
34#define BTRFS_SUBVOL_RDONLY (1ULL << 1) 34#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
35#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
35#define BTRFS_FSID_SIZE 16 36#define BTRFS_FSID_SIZE 16
36#define BTRFS_UUID_SIZE 16 37#define BTRFS_UUID_SIZE 16
37 38
39#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
40
41struct btrfs_qgroup_limit {
42 __u64 flags;
43 __u64 max_rfer;
44 __u64 max_excl;
45 __u64 rsv_rfer;
46 __u64 rsv_excl;
47};
48
49struct btrfs_qgroup_inherit {
50 __u64 flags;
51 __u64 num_qgroups;
52 __u64 num_ref_copies;
53 __u64 num_excl_copies;
54 struct btrfs_qgroup_limit lim;
55 __u64 qgroups[0];
56};
57
58struct btrfs_ioctl_qgroup_limit_args {
59 __u64 qgroupid;
60 struct btrfs_qgroup_limit lim;
61};
62
38#define BTRFS_SUBVOL_NAME_MAX 4039 63#define BTRFS_SUBVOL_NAME_MAX 4039
39struct btrfs_ioctl_vol_args_v2 { 64struct btrfs_ioctl_vol_args_v2 {
40 __s64 fd; 65 __s64 fd;
41 __u64 transid; 66 __u64 transid;
42 __u64 flags; 67 __u64 flags;
43 __u64 unused[4]; 68 union {
69 struct {
70 __u64 size;
71 struct btrfs_qgroup_inherit __user *qgroup_inherit;
72 };
73 __u64 unused[4];
74 };
44 char name[BTRFS_SUBVOL_NAME_MAX + 1]; 75 char name[BTRFS_SUBVOL_NAME_MAX + 1];
45}; 76};
46 77
@@ -299,6 +330,25 @@ struct btrfs_ioctl_get_dev_stats {
299 __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ 330 __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
300}; 331};
301 332
333#define BTRFS_QUOTA_CTL_ENABLE 1
334#define BTRFS_QUOTA_CTL_DISABLE 2
335#define BTRFS_QUOTA_CTL_RESCAN 3
336struct btrfs_ioctl_quota_ctl_args {
337 __u64 cmd;
338 __u64 status;
339};
340
341struct btrfs_ioctl_qgroup_assign_args {
342 __u64 assign;
343 __u64 src;
344 __u64 dst;
345};
346
347struct btrfs_ioctl_qgroup_create_args {
348 __u64 create;
349 __u64 qgroupid;
350};
351
302#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 352#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
303 struct btrfs_ioctl_vol_args) 353 struct btrfs_ioctl_vol_args)
304#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ 354#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -343,6 +393,8 @@ struct btrfs_ioctl_get_dev_stats {
343#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 393#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
344#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 394#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
345 struct btrfs_ioctl_vol_args_v2) 395 struct btrfs_ioctl_vol_args_v2)
396#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
397 struct btrfs_ioctl_vol_args_v2)
346#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) 398#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
347#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 399#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
348#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ 400#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
@@ -365,6 +417,14 @@ struct btrfs_ioctl_get_dev_stats {
365 struct btrfs_ioctl_ino_path_args) 417 struct btrfs_ioctl_ino_path_args)
366#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ 418#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
367 struct btrfs_ioctl_vol_args) 419 struct btrfs_ioctl_vol_args)
420#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
421 struct btrfs_ioctl_quota_ctl_args)
422#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
423 struct btrfs_ioctl_qgroup_assign_args)
424#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
425 struct btrfs_ioctl_qgroup_create_args)
426#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
427 struct btrfs_ioctl_qgroup_limit_args)
368#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ 428#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
369 struct btrfs_ioctl_get_dev_stats) 429 struct btrfs_ioctl_get_dev_stats)
370#endif 430#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
new file mode 100644
index 00000000000..bc424ae5a81
--- /dev/null
+++ b/fs/btrfs/qgroup.c
@@ -0,0 +1,1571 @@
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26
27#include "ctree.h"
28#include "transaction.h"
29#include "disk-io.h"
30#include "locking.h"
31#include "ulist.h"
32#include "ioctl.h"
33#include "backref.h"
34
35/* TODO XXX FIXME
36 * - subvol delete -> delete when ref goes to 0? delete limits also?
37 * - reorganize keys
38 * - compressed
39 * - sync
40 * - rescan
41 * - copy also limits on subvol creation
42 * - limit
43 * - caches fuer ulists
44 * - performance benchmarks
45 * - check all ioctl parameters
46 */
47
48/*
49 * one struct for each qgroup, organized in fs_info->qgroup_tree.
50 */
51struct btrfs_qgroup {
52 u64 qgroupid;
53
54 /*
55 * state
56 */
57 u64 rfer; /* referenced */
58 u64 rfer_cmpr; /* referenced compressed */
59 u64 excl; /* exclusive */
60 u64 excl_cmpr; /* exclusive compressed */
61
62 /*
63 * limits
64 */
65 u64 lim_flags; /* which limits are set */
66 u64 max_rfer;
67 u64 max_excl;
68 u64 rsv_rfer;
69 u64 rsv_excl;
70
71 /*
72 * reservation tracking
73 */
74 u64 reserved;
75
76 /*
77 * lists
78 */
79 struct list_head groups; /* groups this group is member of */
80 struct list_head members; /* groups that are members of this group */
81 struct list_head dirty; /* dirty groups */
82 struct rb_node node; /* tree of qgroups */
83
84 /*
85 * temp variables for accounting operations
86 */
87 u64 tag;
88 u64 refcnt;
89};
90
91/*
92 * glue structure to represent the relations between qgroups.
93 */
94struct btrfs_qgroup_list {
95 struct list_head next_group;
96 struct list_head next_member;
97 struct btrfs_qgroup *group;
98 struct btrfs_qgroup *member;
99};
100
101/* must be called with qgroup_lock held */
102static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
103 u64 qgroupid)
104{
105 struct rb_node *n = fs_info->qgroup_tree.rb_node;
106 struct btrfs_qgroup *qgroup;
107
108 while (n) {
109 qgroup = rb_entry(n, struct btrfs_qgroup, node);
110 if (qgroup->qgroupid < qgroupid)
111 n = n->rb_left;
112 else if (qgroup->qgroupid > qgroupid)
113 n = n->rb_right;
114 else
115 return qgroup;
116 }
117 return NULL;
118}
119
120/* must be called with qgroup_lock held */
121static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
122 u64 qgroupid)
123{
124 struct rb_node **p = &fs_info->qgroup_tree.rb_node;
125 struct rb_node *parent = NULL;
126 struct btrfs_qgroup *qgroup;
127
128 while (*p) {
129 parent = *p;
130 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
131
132 if (qgroup->qgroupid < qgroupid)
133 p = &(*p)->rb_left;
134 else if (qgroup->qgroupid > qgroupid)
135 p = &(*p)->rb_right;
136 else
137 return qgroup;
138 }
139
140 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
141 if (!qgroup)
142 return ERR_PTR(-ENOMEM);
143
144 qgroup->qgroupid = qgroupid;
145 INIT_LIST_HEAD(&qgroup->groups);
146 INIT_LIST_HEAD(&qgroup->members);
147 INIT_LIST_HEAD(&qgroup->dirty);
148
149 rb_link_node(&qgroup->node, parent, p);
150 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
151
152 return qgroup;
153}
154
155/* must be called with qgroup_lock held */
156static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
157{
158 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
159 struct btrfs_qgroup_list *list;
160
161 if (!qgroup)
162 return -ENOENT;
163
164 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
165 list_del(&qgroup->dirty);
166
167 while (!list_empty(&qgroup->groups)) {
168 list = list_first_entry(&qgroup->groups,
169 struct btrfs_qgroup_list, next_group);
170 list_del(&list->next_group);
171 list_del(&list->next_member);
172 kfree(list);
173 }
174
175 while (!list_empty(&qgroup->members)) {
176 list = list_first_entry(&qgroup->members,
177 struct btrfs_qgroup_list, next_member);
178 list_del(&list->next_group);
179 list_del(&list->next_member);
180 kfree(list);
181 }
182 kfree(qgroup);
183
184 return 0;
185}
186
187/* must be called with qgroup_lock held */
188static int add_relation_rb(struct btrfs_fs_info *fs_info,
189 u64 memberid, u64 parentid)
190{
191 struct btrfs_qgroup *member;
192 struct btrfs_qgroup *parent;
193 struct btrfs_qgroup_list *list;
194
195 member = find_qgroup_rb(fs_info, memberid);
196 parent = find_qgroup_rb(fs_info, parentid);
197 if (!member || !parent)
198 return -ENOENT;
199
200 list = kzalloc(sizeof(*list), GFP_ATOMIC);
201 if (!list)
202 return -ENOMEM;
203
204 list->group = parent;
205 list->member = member;
206 list_add_tail(&list->next_group, &member->groups);
207 list_add_tail(&list->next_member, &parent->members);
208
209 return 0;
210}
211
212/* must be called with qgroup_lock held */
213static int del_relation_rb(struct btrfs_fs_info *fs_info,
214 u64 memberid, u64 parentid)
215{
216 struct btrfs_qgroup *member;
217 struct btrfs_qgroup *parent;
218 struct btrfs_qgroup_list *list;
219
220 member = find_qgroup_rb(fs_info, memberid);
221 parent = find_qgroup_rb(fs_info, parentid);
222 if (!member || !parent)
223 return -ENOENT;
224
225 list_for_each_entry(list, &member->groups, next_group) {
226 if (list->group == parent) {
227 list_del(&list->next_group);
228 list_del(&list->next_member);
229 kfree(list);
230 return 0;
231 }
232 }
233 return -ENOENT;
234}
235
236/*
237 * The full config is read in one go, only called from open_ctree()
238 * It doesn't use any locking, as at this point we're still single-threaded
239 */
240int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
241{
242 struct btrfs_key key;
243 struct btrfs_key found_key;
244 struct btrfs_root *quota_root = fs_info->quota_root;
245 struct btrfs_path *path = NULL;
246 struct extent_buffer *l;
247 int slot;
248 int ret = 0;
249 u64 flags = 0;
250
251 if (!fs_info->quota_enabled)
252 return 0;
253
254 path = btrfs_alloc_path();
255 if (!path) {
256 ret = -ENOMEM;
257 goto out;
258 }
259
260 /* default this to quota off, in case no status key is found */
261 fs_info->qgroup_flags = 0;
262
263 /*
264 * pass 1: read status, all qgroup infos and limits
265 */
266 key.objectid = 0;
267 key.type = 0;
268 key.offset = 0;
269 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
270 if (ret)
271 goto out;
272
273 while (1) {
274 struct btrfs_qgroup *qgroup;
275
276 slot = path->slots[0];
277 l = path->nodes[0];
278 btrfs_item_key_to_cpu(l, &found_key, slot);
279
280 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
281 struct btrfs_qgroup_status_item *ptr;
282
283 ptr = btrfs_item_ptr(l, slot,
284 struct btrfs_qgroup_status_item);
285
286 if (btrfs_qgroup_status_version(l, ptr) !=
287 BTRFS_QGROUP_STATUS_VERSION) {
288 printk(KERN_ERR
289 "btrfs: old qgroup version, quota disabled\n");
290 goto out;
291 }
292 if (btrfs_qgroup_status_generation(l, ptr) !=
293 fs_info->generation) {
294 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
295 printk(KERN_ERR
296 "btrfs: qgroup generation mismatch, "
297 "marked as inconsistent\n");
298 }
299 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
300 ptr);
301 /* FIXME read scan element */
302 goto next1;
303 }
304
305 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
306 found_key.type != BTRFS_QGROUP_LIMIT_KEY)
307 goto next1;
308
309 qgroup = find_qgroup_rb(fs_info, found_key.offset);
310 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
311 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
312 printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
313 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
314 }
315 if (!qgroup) {
316 qgroup = add_qgroup_rb(fs_info, found_key.offset);
317 if (IS_ERR(qgroup)) {
318 ret = PTR_ERR(qgroup);
319 goto out;
320 }
321 }
322 switch (found_key.type) {
323 case BTRFS_QGROUP_INFO_KEY: {
324 struct btrfs_qgroup_info_item *ptr;
325
326 ptr = btrfs_item_ptr(l, slot,
327 struct btrfs_qgroup_info_item);
328 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
329 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
330 qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
331 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
332 /* generation currently unused */
333 break;
334 }
335 case BTRFS_QGROUP_LIMIT_KEY: {
336 struct btrfs_qgroup_limit_item *ptr;
337
338 ptr = btrfs_item_ptr(l, slot,
339 struct btrfs_qgroup_limit_item);
340 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
341 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
342 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
343 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
344 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
345 break;
346 }
347 }
348next1:
349 ret = btrfs_next_item(quota_root, path);
350 if (ret < 0)
351 goto out;
352 if (ret)
353 break;
354 }
355 btrfs_release_path(path);
356
357 /*
358 * pass 2: read all qgroup relations
359 */
360 key.objectid = 0;
361 key.type = BTRFS_QGROUP_RELATION_KEY;
362 key.offset = 0;
363 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
364 if (ret)
365 goto out;
366 while (1) {
367 slot = path->slots[0];
368 l = path->nodes[0];
369 btrfs_item_key_to_cpu(l, &found_key, slot);
370
371 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
372 goto next2;
373
374 if (found_key.objectid > found_key.offset) {
375 /* parent <- member, not needed to build config */
376 /* FIXME should we omit the key completely? */
377 goto next2;
378 }
379
380 ret = add_relation_rb(fs_info, found_key.objectid,
381 found_key.offset);
382 if (ret)
383 goto out;
384next2:
385 ret = btrfs_next_item(quota_root, path);
386 if (ret < 0)
387 goto out;
388 if (ret)
389 break;
390 }
391out:
392 fs_info->qgroup_flags |= flags;
393 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
394 fs_info->quota_enabled = 0;
395 fs_info->pending_quota_state = 0;
396 }
397 btrfs_free_path(path);
398
399 return ret < 0 ? ret : 0;
400}
401
402/*
403 * This is only called from close_ctree() or open_ctree(), both in single-
404 * treaded paths. Clean up the in-memory structures. No locking needed.
405 */
406void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
407{
408 struct rb_node *n;
409 struct btrfs_qgroup *qgroup;
410 struct btrfs_qgroup_list *list;
411
412 while ((n = rb_first(&fs_info->qgroup_tree))) {
413 qgroup = rb_entry(n, struct btrfs_qgroup, node);
414 rb_erase(n, &fs_info->qgroup_tree);
415
416 WARN_ON(!list_empty(&qgroup->dirty));
417
418 while (!list_empty(&qgroup->groups)) {
419 list = list_first_entry(&qgroup->groups,
420 struct btrfs_qgroup_list,
421 next_group);
422 list_del(&list->next_group);
423 list_del(&list->next_member);
424 kfree(list);
425 }
426
427 while (!list_empty(&qgroup->members)) {
428 list = list_first_entry(&qgroup->members,
429 struct btrfs_qgroup_list,
430 next_member);
431 list_del(&list->next_group);
432 list_del(&list->next_member);
433 kfree(list);
434 }
435 kfree(qgroup);
436 }
437}
438
439static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
440 struct btrfs_root *quota_root,
441 u64 src, u64 dst)
442{
443 int ret;
444 struct btrfs_path *path;
445 struct btrfs_key key;
446
447 path = btrfs_alloc_path();
448 if (!path)
449 return -ENOMEM;
450
451 key.objectid = src;
452 key.type = BTRFS_QGROUP_RELATION_KEY;
453 key.offset = dst;
454
455 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
456
457 btrfs_mark_buffer_dirty(path->nodes[0]);
458
459 btrfs_free_path(path);
460 return ret;
461}
462
463static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
464 struct btrfs_root *quota_root,
465 u64 src, u64 dst)
466{
467 int ret;
468 struct btrfs_path *path;
469 struct btrfs_key key;
470
471 path = btrfs_alloc_path();
472 if (!path)
473 return -ENOMEM;
474
475 key.objectid = src;
476 key.type = BTRFS_QGROUP_RELATION_KEY;
477 key.offset = dst;
478
479 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
480 if (ret < 0)
481 goto out;
482
483 if (ret > 0) {
484 ret = -ENOENT;
485 goto out;
486 }
487
488 ret = btrfs_del_item(trans, quota_root, path);
489out:
490 btrfs_free_path(path);
491 return ret;
492}
493
494static int add_qgroup_item(struct btrfs_trans_handle *trans,
495 struct btrfs_root *quota_root, u64 qgroupid)
496{
497 int ret;
498 struct btrfs_path *path;
499 struct btrfs_qgroup_info_item *qgroup_info;
500 struct btrfs_qgroup_limit_item *qgroup_limit;
501 struct extent_buffer *leaf;
502 struct btrfs_key key;
503
504 path = btrfs_alloc_path();
505 if (!path)
506 return -ENOMEM;
507
508 key.objectid = 0;
509 key.type = BTRFS_QGROUP_INFO_KEY;
510 key.offset = qgroupid;
511
512 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
513 sizeof(*qgroup_info));
514 if (ret)
515 goto out;
516
517 leaf = path->nodes[0];
518 qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
519 struct btrfs_qgroup_info_item);
520 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
521 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
522 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
523 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
524 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
525
526 btrfs_mark_buffer_dirty(leaf);
527
528 btrfs_release_path(path);
529
530 key.type = BTRFS_QGROUP_LIMIT_KEY;
531 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
532 sizeof(*qgroup_limit));
533 if (ret)
534 goto out;
535
536 leaf = path->nodes[0];
537 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
538 struct btrfs_qgroup_limit_item);
539 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
540 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
541 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
542 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
543 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
544
545 btrfs_mark_buffer_dirty(leaf);
546
547 ret = 0;
548out:
549 btrfs_free_path(path);
550 return ret;
551}
552
553static int del_qgroup_item(struct btrfs_trans_handle *trans,
554 struct btrfs_root *quota_root, u64 qgroupid)
555{
556 int ret;
557 struct btrfs_path *path;
558 struct btrfs_key key;
559
560 path = btrfs_alloc_path();
561 if (!path)
562 return -ENOMEM;
563
564 key.objectid = 0;
565 key.type = BTRFS_QGROUP_INFO_KEY;
566 key.offset = qgroupid;
567 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
568 if (ret < 0)
569 goto out;
570
571 if (ret > 0) {
572 ret = -ENOENT;
573 goto out;
574 }
575
576 ret = btrfs_del_item(trans, quota_root, path);
577 if (ret)
578 goto out;
579
580 btrfs_release_path(path);
581
582 key.type = BTRFS_QGROUP_LIMIT_KEY;
583 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
584 if (ret < 0)
585 goto out;
586
587 if (ret > 0) {
588 ret = -ENOENT;
589 goto out;
590 }
591
592 ret = btrfs_del_item(trans, quota_root, path);
593
594out:
595 btrfs_free_path(path);
596 return ret;
597}
598
599static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
600 struct btrfs_root *root, u64 qgroupid,
601 u64 flags, u64 max_rfer, u64 max_excl,
602 u64 rsv_rfer, u64 rsv_excl)
603{
604 struct btrfs_path *path;
605 struct btrfs_key key;
606 struct extent_buffer *l;
607 struct btrfs_qgroup_limit_item *qgroup_limit;
608 int ret;
609 int slot;
610
611 key.objectid = 0;
612 key.type = BTRFS_QGROUP_LIMIT_KEY;
613 key.offset = qgroupid;
614
615 path = btrfs_alloc_path();
616 BUG_ON(!path);
617 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
618 if (ret > 0)
619 ret = -ENOENT;
620
621 if (ret)
622 goto out;
623
624 l = path->nodes[0];
625 slot = path->slots[0];
626 qgroup_limit = btrfs_item_ptr(l, path->slots[0],
627 struct btrfs_qgroup_limit_item);
628 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
629 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
630 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
631 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
632 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
633
634 btrfs_mark_buffer_dirty(l);
635
636out:
637 btrfs_free_path(path);
638 return ret;
639}
640
641static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
642 struct btrfs_root *root,
643 struct btrfs_qgroup *qgroup)
644{
645 struct btrfs_path *path;
646 struct btrfs_key key;
647 struct extent_buffer *l;
648 struct btrfs_qgroup_info_item *qgroup_info;
649 int ret;
650 int slot;
651
652 key.objectid = 0;
653 key.type = BTRFS_QGROUP_INFO_KEY;
654 key.offset = qgroup->qgroupid;
655
656 path = btrfs_alloc_path();
657 BUG_ON(!path);
658 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
659 if (ret > 0)
660 ret = -ENOENT;
661
662 if (ret)
663 goto out;
664
665 l = path->nodes[0];
666 slot = path->slots[0];
667 qgroup_info = btrfs_item_ptr(l, path->slots[0],
668 struct btrfs_qgroup_info_item);
669 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
670 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
671 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
672 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
673 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
674
675 btrfs_mark_buffer_dirty(l);
676
677out:
678 btrfs_free_path(path);
679 return ret;
680}
681
682static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
683 struct btrfs_fs_info *fs_info,
684 struct btrfs_root *root)
685{
686 struct btrfs_path *path;
687 struct btrfs_key key;
688 struct extent_buffer *l;
689 struct btrfs_qgroup_status_item *ptr;
690 int ret;
691 int slot;
692
693 key.objectid = 0;
694 key.type = BTRFS_QGROUP_STATUS_KEY;
695 key.offset = 0;
696
697 path = btrfs_alloc_path();
698 BUG_ON(!path);
699 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
700 if (ret > 0)
701 ret = -ENOENT;
702
703 if (ret)
704 goto out;
705
706 l = path->nodes[0];
707 slot = path->slots[0];
708 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
709 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
710 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
711 /* XXX scan */
712
713 btrfs_mark_buffer_dirty(l);
714
715out:
716 btrfs_free_path(path);
717 return ret;
718}
719
720/*
721 * called with qgroup_lock held
722 */
723static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
724 struct btrfs_root *root)
725{
726 struct btrfs_path *path;
727 struct btrfs_key key;
728 int ret;
729
730 if (!root)
731 return -EINVAL;
732
733 path = btrfs_alloc_path();
734 if (!path)
735 return -ENOMEM;
736
737 while (1) {
738 key.objectid = 0;
739 key.offset = 0;
740 key.type = 0;
741
742 path->leave_spinning = 1;
743 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
744 if (ret > 0) {
745 if (path->slots[0] == 0)
746 break;
747 path->slots[0]--;
748 } else if (ret < 0) {
749 break;
750 }
751
752 ret = btrfs_del_item(trans, root, path);
753 if (ret)
754 goto out;
755 btrfs_release_path(path);
756 }
757 ret = 0;
758out:
759 root->fs_info->pending_quota_state = 0;
760 btrfs_free_path(path);
761 return ret;
762}
763
764int btrfs_quota_enable(struct btrfs_trans_handle *trans,
765 struct btrfs_fs_info *fs_info)
766{
767 struct btrfs_root *quota_root;
768 struct btrfs_path *path = NULL;
769 struct btrfs_qgroup_status_item *ptr;
770 struct extent_buffer *leaf;
771 struct btrfs_key key;
772 int ret = 0;
773
774 spin_lock(&fs_info->qgroup_lock);
775 if (fs_info->quota_root) {
776 fs_info->pending_quota_state = 1;
777 spin_unlock(&fs_info->qgroup_lock);
778 goto out;
779 }
780 spin_unlock(&fs_info->qgroup_lock);
781
782 /*
783 * initially create the quota tree
784 */
785 quota_root = btrfs_create_tree(trans, fs_info,
786 BTRFS_QUOTA_TREE_OBJECTID);
787 if (IS_ERR(quota_root)) {
788 ret = PTR_ERR(quota_root);
789 goto out;
790 }
791
792 path = btrfs_alloc_path();
793 if (!path)
794 return -ENOMEM;
795
796 key.objectid = 0;
797 key.type = BTRFS_QGROUP_STATUS_KEY;
798 key.offset = 0;
799
800 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
801 sizeof(*ptr));
802 if (ret)
803 goto out;
804
805 leaf = path->nodes[0];
806 ptr = btrfs_item_ptr(leaf, path->slots[0],
807 struct btrfs_qgroup_status_item);
808 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
809 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
810 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
811 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
812 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
813 btrfs_set_qgroup_status_scan(leaf, ptr, 0);
814
815 btrfs_mark_buffer_dirty(leaf);
816
817 spin_lock(&fs_info->qgroup_lock);
818 fs_info->quota_root = quota_root;
819 fs_info->pending_quota_state = 1;
820 spin_unlock(&fs_info->qgroup_lock);
821out:
822 btrfs_free_path(path);
823 return ret;
824}
825
826int btrfs_quota_disable(struct btrfs_trans_handle *trans,
827 struct btrfs_fs_info *fs_info)
828{
829 struct btrfs_root *tree_root = fs_info->tree_root;
830 struct btrfs_root *quota_root;
831 int ret = 0;
832
833 spin_lock(&fs_info->qgroup_lock);
834 fs_info->quota_enabled = 0;
835 fs_info->pending_quota_state = 0;
836 quota_root = fs_info->quota_root;
837 fs_info->quota_root = NULL;
838 btrfs_free_qgroup_config(fs_info);
839 spin_unlock(&fs_info->qgroup_lock);
840
841 if (!quota_root)
842 return -EINVAL;
843
844 ret = btrfs_clean_quota_tree(trans, quota_root);
845 if (ret)
846 goto out;
847
848 ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
849 if (ret)
850 goto out;
851
852 list_del(&quota_root->dirty_list);
853
854 btrfs_tree_lock(quota_root->node);
855 clean_tree_block(trans, tree_root, quota_root->node);
856 btrfs_tree_unlock(quota_root->node);
857 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
858
859 free_extent_buffer(quota_root->node);
860 free_extent_buffer(quota_root->commit_root);
861 kfree(quota_root);
862out:
863 return ret;
864}
865
866int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
867{
868 /* FIXME */
869 return 0;
870}
871
872int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
873 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
874{
875 struct btrfs_root *quota_root;
876 int ret = 0;
877
878 quota_root = fs_info->quota_root;
879 if (!quota_root)
880 return -EINVAL;
881
882 ret = add_qgroup_relation_item(trans, quota_root, src, dst);
883 if (ret)
884 return ret;
885
886 ret = add_qgroup_relation_item(trans, quota_root, dst, src);
887 if (ret) {
888 del_qgroup_relation_item(trans, quota_root, src, dst);
889 return ret;
890 }
891
892 spin_lock(&fs_info->qgroup_lock);
893 ret = add_relation_rb(quota_root->fs_info, src, dst);
894 spin_unlock(&fs_info->qgroup_lock);
895
896 return ret;
897}
898
899int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
900 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
901{
902 struct btrfs_root *quota_root;
903 int ret = 0;
904 int err;
905
906 quota_root = fs_info->quota_root;
907 if (!quota_root)
908 return -EINVAL;
909
910 ret = del_qgroup_relation_item(trans, quota_root, src, dst);
911 err = del_qgroup_relation_item(trans, quota_root, dst, src);
912 if (err && !ret)
913 ret = err;
914
915 spin_lock(&fs_info->qgroup_lock);
916 del_relation_rb(fs_info, src, dst);
917
918 spin_unlock(&fs_info->qgroup_lock);
919
920 return ret;
921}
922
923int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
924 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
925{
926 struct btrfs_root *quota_root;
927 struct btrfs_qgroup *qgroup;
928 int ret = 0;
929
930 quota_root = fs_info->quota_root;
931 if (!quota_root)
932 return -EINVAL;
933
934 ret = add_qgroup_item(trans, quota_root, qgroupid);
935
936 spin_lock(&fs_info->qgroup_lock);
937 qgroup = add_qgroup_rb(fs_info, qgroupid);
938 spin_unlock(&fs_info->qgroup_lock);
939
940 if (IS_ERR(qgroup))
941 ret = PTR_ERR(qgroup);
942
943 return ret;
944}
945
946int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
947 struct btrfs_fs_info *fs_info, u64 qgroupid)
948{
949 struct btrfs_root *quota_root;
950 int ret = 0;
951
952 quota_root = fs_info->quota_root;
953 if (!quota_root)
954 return -EINVAL;
955
956 ret = del_qgroup_item(trans, quota_root, qgroupid);
957
958 spin_lock(&fs_info->qgroup_lock);
959 del_qgroup_rb(quota_root->fs_info, qgroupid);
960
961 spin_unlock(&fs_info->qgroup_lock);
962
963 return ret;
964}
965
966int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
967 struct btrfs_fs_info *fs_info, u64 qgroupid,
968 struct btrfs_qgroup_limit *limit)
969{
970 struct btrfs_root *quota_root = fs_info->quota_root;
971 struct btrfs_qgroup *qgroup;
972 int ret = 0;
973
974 if (!quota_root)
975 return -EINVAL;
976
977 ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
978 limit->flags, limit->max_rfer,
979 limit->max_excl, limit->rsv_rfer,
980 limit->rsv_excl);
981 if (ret) {
982 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
983 printk(KERN_INFO "unable to update quota limit for %llu\n",
984 (unsigned long long)qgroupid);
985 }
986
987 spin_lock(&fs_info->qgroup_lock);
988
989 qgroup = find_qgroup_rb(fs_info, qgroupid);
990 if (!qgroup) {
991 ret = -ENOENT;
992 goto unlock;
993 }
994 qgroup->lim_flags = limit->flags;
995 qgroup->max_rfer = limit->max_rfer;
996 qgroup->max_excl = limit->max_excl;
997 qgroup->rsv_rfer = limit->rsv_rfer;
998 qgroup->rsv_excl = limit->rsv_excl;
999
1000unlock:
1001 spin_unlock(&fs_info->qgroup_lock);
1002
1003 return ret;
1004}
1005
1006static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1007 struct btrfs_qgroup *qgroup)
1008{
1009 if (list_empty(&qgroup->dirty))
1010 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1011}
1012
1013/*
1014 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1015 * the modification into a list that's later used by btrfs_end_transaction to
1016 * pass the recorded modifications on to btrfs_qgroup_account_ref.
1017 */
1018int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1019 struct btrfs_delayed_ref_node *node,
1020 struct btrfs_delayed_extent_op *extent_op)
1021{
1022 struct qgroup_update *u;
1023
1024 BUG_ON(!trans->delayed_ref_elem.seq);
1025 u = kmalloc(sizeof(*u), GFP_NOFS);
1026 if (!u)
1027 return -ENOMEM;
1028
1029 u->node = node;
1030 u->extent_op = extent_op;
1031 list_add_tail(&u->list, &trans->qgroup_ref_list);
1032
1033 return 0;
1034}
1035
1036/*
1037 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1038 * from the fs. First, all roots referencing the extent are searched, and
1039 * then the space is accounted accordingly to the different roots. The
1040 * accounting algorithm works in 3 steps documented inline.
1041 */
1042int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1043 struct btrfs_fs_info *fs_info,
1044 struct btrfs_delayed_ref_node *node,
1045 struct btrfs_delayed_extent_op *extent_op)
1046{
1047 struct btrfs_key ins;
1048 struct btrfs_root *quota_root;
1049 u64 ref_root;
1050 struct btrfs_qgroup *qgroup;
1051 struct ulist_node *unode;
1052 struct ulist *roots = NULL;
1053 struct ulist *tmp = NULL;
1054 struct ulist_iterator uiter;
1055 u64 seq;
1056 int ret = 0;
1057 int sgn;
1058
1059 if (!fs_info->quota_enabled)
1060 return 0;
1061
1062 BUG_ON(!fs_info->quota_root);
1063
1064 ins.objectid = node->bytenr;
1065 ins.offset = node->num_bytes;
1066 ins.type = BTRFS_EXTENT_ITEM_KEY;
1067
1068 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1069 node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1070 struct btrfs_delayed_tree_ref *ref;
1071 ref = btrfs_delayed_node_to_tree_ref(node);
1072 ref_root = ref->root;
1073 } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1074 node->type == BTRFS_SHARED_DATA_REF_KEY) {
1075 struct btrfs_delayed_data_ref *ref;
1076 ref = btrfs_delayed_node_to_data_ref(node);
1077 ref_root = ref->root;
1078 } else {
1079 BUG();
1080 }
1081
1082 if (!is_fstree(ref_root)) {
1083 /*
1084 * non-fs-trees are not being accounted
1085 */
1086 return 0;
1087 }
1088
1089 switch (node->action) {
1090 case BTRFS_ADD_DELAYED_REF:
1091 case BTRFS_ADD_DELAYED_EXTENT:
1092 sgn = 1;
1093 break;
1094 case BTRFS_DROP_DELAYED_REF:
1095 sgn = -1;
1096 break;
1097 case BTRFS_UPDATE_DELAYED_HEAD:
1098 return 0;
1099 default:
1100 BUG();
1101 }
1102
1103 /*
1104 * the delayed ref sequence number we pass depends on the direction of
1105 * the operation. for add operations, we pass (node->seq - 1) to skip
1106 * the delayed ref's current sequence number, because we need the state
1107 * of the tree before the add operation. for delete operations, we pass
1108 * (node->seq) to include the delayed ref's current sequence number,
1109 * because we need the state of the tree after the delete operation.
1110 */
1111 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
1112 sgn > 0 ? node->seq - 1 : node->seq, &roots);
1113 if (ret < 0)
1114 goto out;
1115
1116 spin_lock(&fs_info->qgroup_lock);
1117 quota_root = fs_info->quota_root;
1118 if (!quota_root)
1119 goto unlock;
1120
1121 qgroup = find_qgroup_rb(fs_info, ref_root);
1122 if (!qgroup)
1123 goto unlock;
1124
1125 /*
1126 * step 1: for each old ref, visit all nodes once and inc refcnt
1127 */
1128 tmp = ulist_alloc(GFP_ATOMIC);
1129 if (!tmp) {
1130 ret = -ENOMEM;
1131 goto unlock;
1132 }
1133 seq = fs_info->qgroup_seq;
1134 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1135
1136 ULIST_ITER_INIT(&uiter);
1137 while ((unode = ulist_next(roots, &uiter))) {
1138 struct ulist_node *tmp_unode;
1139 struct ulist_iterator tmp_uiter;
1140 struct btrfs_qgroup *qg;
1141
1142 qg = find_qgroup_rb(fs_info, unode->val);
1143 if (!qg)
1144 continue;
1145
1146 ulist_reinit(tmp);
1147 /* XXX id not needed */
1148 ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC);
1149 ULIST_ITER_INIT(&tmp_uiter);
1150 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1151 struct btrfs_qgroup_list *glist;
1152
1153 qg = (struct btrfs_qgroup *)tmp_unode->aux;
1154 if (qg->refcnt < seq)
1155 qg->refcnt = seq + 1;
1156 else
1157 ++qg->refcnt;
1158
1159 list_for_each_entry(glist, &qg->groups, next_group) {
1160 ulist_add(tmp, glist->group->qgroupid,
1161 (unsigned long)glist->group,
1162 GFP_ATOMIC);
1163 }
1164 }
1165 }
1166
1167 /*
1168 * step 2: walk from the new root
1169 */
1170 ulist_reinit(tmp);
1171 ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
1172 ULIST_ITER_INIT(&uiter);
1173 while ((unode = ulist_next(tmp, &uiter))) {
1174 struct btrfs_qgroup *qg;
1175 struct btrfs_qgroup_list *glist;
1176
1177 qg = (struct btrfs_qgroup *)unode->aux;
1178 if (qg->refcnt < seq) {
1179 /* not visited by step 1 */
1180 qg->rfer += sgn * node->num_bytes;
1181 qg->rfer_cmpr += sgn * node->num_bytes;
1182 if (roots->nnodes == 0) {
1183 qg->excl += sgn * node->num_bytes;
1184 qg->excl_cmpr += sgn * node->num_bytes;
1185 }
1186 qgroup_dirty(fs_info, qg);
1187 }
1188 WARN_ON(qg->tag >= seq);
1189 qg->tag = seq;
1190
1191 list_for_each_entry(glist, &qg->groups, next_group) {
1192 ulist_add(tmp, glist->group->qgroupid,
1193 (unsigned long)glist->group, GFP_ATOMIC);
1194 }
1195 }
1196
1197 /*
1198 * step 3: walk again from old refs
1199 */
1200 ULIST_ITER_INIT(&uiter);
1201 while ((unode = ulist_next(roots, &uiter))) {
1202 struct btrfs_qgroup *qg;
1203 struct ulist_node *tmp_unode;
1204 struct ulist_iterator tmp_uiter;
1205
1206 qg = find_qgroup_rb(fs_info, unode->val);
1207 if (!qg)
1208 continue;
1209
1210 ulist_reinit(tmp);
1211 ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC);
1212 ULIST_ITER_INIT(&tmp_uiter);
1213 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1214 struct btrfs_qgroup_list *glist;
1215
1216 qg = (struct btrfs_qgroup *)tmp_unode->aux;
1217 if (qg->tag == seq)
1218 continue;
1219
1220 if (qg->refcnt - seq == roots->nnodes) {
1221 qg->excl -= sgn * node->num_bytes;
1222 qg->excl_cmpr -= sgn * node->num_bytes;
1223 qgroup_dirty(fs_info, qg);
1224 }
1225
1226 list_for_each_entry(glist, &qg->groups, next_group) {
1227 ulist_add(tmp, glist->group->qgroupid,
1228 (unsigned long)glist->group,
1229 GFP_ATOMIC);
1230 }
1231 }
1232 }
1233 ret = 0;
1234unlock:
1235 spin_unlock(&fs_info->qgroup_lock);
1236out:
1237 ulist_free(roots);
1238 ulist_free(tmp);
1239
1240 return ret;
1241}
1242
1243/*
1244 * called from commit_transaction. Writes all changed qgroups to disk.
1245 */
1246int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1247 struct btrfs_fs_info *fs_info)
1248{
1249 struct btrfs_root *quota_root = fs_info->quota_root;
1250 int ret = 0;
1251
1252 if (!quota_root)
1253 goto out;
1254
1255 fs_info->quota_enabled = fs_info->pending_quota_state;
1256
1257 spin_lock(&fs_info->qgroup_lock);
1258 while (!list_empty(&fs_info->dirty_qgroups)) {
1259 struct btrfs_qgroup *qgroup;
1260 qgroup = list_first_entry(&fs_info->dirty_qgroups,
1261 struct btrfs_qgroup, dirty);
1262 list_del_init(&qgroup->dirty);
1263 spin_unlock(&fs_info->qgroup_lock);
1264 ret = update_qgroup_info_item(trans, quota_root, qgroup);
1265 if (ret)
1266 fs_info->qgroup_flags |=
1267 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1268 spin_lock(&fs_info->qgroup_lock);
1269 }
1270 if (fs_info->quota_enabled)
1271 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1272 else
1273 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1274 spin_unlock(&fs_info->qgroup_lock);
1275
1276 ret = update_qgroup_status_item(trans, fs_info, quota_root);
1277 if (ret)
1278 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1279
1280out:
1281
1282 return ret;
1283}
1284
1285/*
1286 * copy the acounting information between qgroups. This is necessary when a
1287 * snapshot or a subvolume is created
1288 */
1289int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1290 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1291 struct btrfs_qgroup_inherit *inherit)
1292{
1293 int ret = 0;
1294 int i;
1295 u64 *i_qgroups;
1296 struct btrfs_root *quota_root = fs_info->quota_root;
1297 struct btrfs_qgroup *srcgroup;
1298 struct btrfs_qgroup *dstgroup;
1299 u32 level_size = 0;
1300
1301 if (!fs_info->quota_enabled)
1302 return 0;
1303
1304 if (!quota_root)
1305 return -EINVAL;
1306
1307 /*
1308 * create a tracking group for the subvol itself
1309 */
1310 ret = add_qgroup_item(trans, quota_root, objectid);
1311 if (ret)
1312 goto out;
1313
1314 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1315 ret = update_qgroup_limit_item(trans, quota_root, objectid,
1316 inherit->lim.flags,
1317 inherit->lim.max_rfer,
1318 inherit->lim.max_excl,
1319 inherit->lim.rsv_rfer,
1320 inherit->lim.rsv_excl);
1321 if (ret)
1322 goto out;
1323 }
1324
1325 if (srcid) {
1326 struct btrfs_root *srcroot;
1327 struct btrfs_key srckey;
1328 int srcroot_level;
1329
1330 srckey.objectid = srcid;
1331 srckey.type = BTRFS_ROOT_ITEM_KEY;
1332 srckey.offset = (u64)-1;
1333 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1334 if (IS_ERR(srcroot)) {
1335 ret = PTR_ERR(srcroot);
1336 goto out;
1337 }
1338
1339 rcu_read_lock();
1340 srcroot_level = btrfs_header_level(srcroot->node);
1341 level_size = btrfs_level_size(srcroot, srcroot_level);
1342 rcu_read_unlock();
1343 }
1344
1345 /*
1346 * add qgroup to all inherited groups
1347 */
1348 if (inherit) {
1349 i_qgroups = (u64 *)(inherit + 1);
1350 for (i = 0; i < inherit->num_qgroups; ++i) {
1351 ret = add_qgroup_relation_item(trans, quota_root,
1352 objectid, *i_qgroups);
1353 if (ret)
1354 goto out;
1355 ret = add_qgroup_relation_item(trans, quota_root,
1356 *i_qgroups, objectid);
1357 if (ret)
1358 goto out;
1359 ++i_qgroups;
1360 }
1361 }
1362
1363
1364 spin_lock(&fs_info->qgroup_lock);
1365
1366 dstgroup = add_qgroup_rb(fs_info, objectid);
1367 if (!dstgroup)
1368 goto unlock;
1369
1370 if (srcid) {
1371 srcgroup = find_qgroup_rb(fs_info, srcid);
1372 if (!srcgroup)
1373 goto unlock;
1374 dstgroup->rfer = srcgroup->rfer - level_size;
1375 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1376 srcgroup->excl = level_size;
1377 srcgroup->excl_cmpr = level_size;
1378 qgroup_dirty(fs_info, dstgroup);
1379 qgroup_dirty(fs_info, srcgroup);
1380 }
1381
1382 if (!inherit)
1383 goto unlock;
1384
1385 i_qgroups = (u64 *)(inherit + 1);
1386 for (i = 0; i < inherit->num_qgroups; ++i) {
1387 ret = add_relation_rb(quota_root->fs_info, objectid,
1388 *i_qgroups);
1389 if (ret)
1390 goto unlock;
1391 ++i_qgroups;
1392 }
1393
1394 for (i = 0; i < inherit->num_ref_copies; ++i) {
1395 struct btrfs_qgroup *src;
1396 struct btrfs_qgroup *dst;
1397
1398 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1399 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1400
1401 if (!src || !dst) {
1402 ret = -EINVAL;
1403 goto unlock;
1404 }
1405
1406 dst->rfer = src->rfer - level_size;
1407 dst->rfer_cmpr = src->rfer_cmpr - level_size;
1408 i_qgroups += 2;
1409 }
1410 for (i = 0; i < inherit->num_excl_copies; ++i) {
1411 struct btrfs_qgroup *src;
1412 struct btrfs_qgroup *dst;
1413
1414 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1415 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1416
1417 if (!src || !dst) {
1418 ret = -EINVAL;
1419 goto unlock;
1420 }
1421
1422 dst->excl = src->excl + level_size;
1423 dst->excl_cmpr = src->excl_cmpr + level_size;
1424 i_qgroups += 2;
1425 }
1426
1427unlock:
1428 spin_unlock(&fs_info->qgroup_lock);
1429out:
1430 return ret;
1431}
1432
1433/*
1434 * reserve some space for a qgroup and all its parents. The reservation takes
1435 * place with start_transaction or dealloc_reserve, similar to ENOSPC
1436 * accounting. If not enough space is available, EDQUOT is returned.
1437 * We assume that the requested space is new for all qgroups.
1438 */
1439int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1440{
1441 struct btrfs_root *quota_root;
1442 struct btrfs_qgroup *qgroup;
1443 struct btrfs_fs_info *fs_info = root->fs_info;
1444 u64 ref_root = root->root_key.objectid;
1445 int ret = 0;
1446 struct ulist *ulist = NULL;
1447 struct ulist_node *unode;
1448 struct ulist_iterator uiter;
1449
1450 if (!is_fstree(ref_root))
1451 return 0;
1452
1453 if (num_bytes == 0)
1454 return 0;
1455
1456 spin_lock(&fs_info->qgroup_lock);
1457 quota_root = fs_info->quota_root;
1458 if (!quota_root)
1459 goto out;
1460
1461 qgroup = find_qgroup_rb(fs_info, ref_root);
1462 if (!qgroup)
1463 goto out;
1464
1465 /*
1466 * in a first step, we check all affected qgroups if any limits would
1467 * be exceeded
1468 */
1469 ulist = ulist_alloc(GFP_ATOMIC);
1470 ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
1471 ULIST_ITER_INIT(&uiter);
1472 while ((unode = ulist_next(ulist, &uiter))) {
1473 struct btrfs_qgroup *qg;
1474 struct btrfs_qgroup_list *glist;
1475
1476 qg = (struct btrfs_qgroup *)unode->aux;
1477
1478 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1479 qg->reserved + qg->rfer + num_bytes >
1480 qg->max_rfer)
1481 ret = -EDQUOT;
1482
1483 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1484 qg->reserved + qg->excl + num_bytes >
1485 qg->max_excl)
1486 ret = -EDQUOT;
1487
1488 list_for_each_entry(glist, &qg->groups, next_group) {
1489 ulist_add(ulist, glist->group->qgroupid,
1490 (unsigned long)glist->group, GFP_ATOMIC);
1491 }
1492 }
1493 if (ret)
1494 goto out;
1495
1496 /*
1497 * no limits exceeded, now record the reservation into all qgroups
1498 */
1499 ULIST_ITER_INIT(&uiter);
1500 while ((unode = ulist_next(ulist, &uiter))) {
1501 struct btrfs_qgroup *qg;
1502
1503 qg = (struct btrfs_qgroup *)unode->aux;
1504
1505 qg->reserved += num_bytes;
1506 }
1507
1508out:
1509 spin_unlock(&fs_info->qgroup_lock);
1510 ulist_free(ulist);
1511
1512 return ret;
1513}
1514
1515void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1516{
1517 struct btrfs_root *quota_root;
1518 struct btrfs_qgroup *qgroup;
1519 struct btrfs_fs_info *fs_info = root->fs_info;
1520 struct ulist *ulist = NULL;
1521 struct ulist_node *unode;
1522 struct ulist_iterator uiter;
1523 u64 ref_root = root->root_key.objectid;
1524
1525 if (!is_fstree(ref_root))
1526 return;
1527
1528 if (num_bytes == 0)
1529 return;
1530
1531 spin_lock(&fs_info->qgroup_lock);
1532
1533 quota_root = fs_info->quota_root;
1534 if (!quota_root)
1535 goto out;
1536
1537 qgroup = find_qgroup_rb(fs_info, ref_root);
1538 if (!qgroup)
1539 goto out;
1540
1541 ulist = ulist_alloc(GFP_ATOMIC);
1542 ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
1543 ULIST_ITER_INIT(&uiter);
1544 while ((unode = ulist_next(ulist, &uiter))) {
1545 struct btrfs_qgroup *qg;
1546 struct btrfs_qgroup_list *glist;
1547
1548 qg = (struct btrfs_qgroup *)unode->aux;
1549
1550 qg->reserved -= num_bytes;
1551
1552 list_for_each_entry(glist, &qg->groups, next_group) {
1553 ulist_add(ulist, glist->group->qgroupid,
1554 (unsigned long)glist->group, GFP_ATOMIC);
1555 }
1556 }
1557
1558out:
1559 spin_unlock(&fs_info->qgroup_lock);
1560 ulist_free(ulist);
1561}
1562
1563void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1564{
1565 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1566 return;
1567 printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
1568 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1569 trans->delayed_ref_elem.seq);
1570 BUG();
1571}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 328b95f6766..cc20e95ea28 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -38,7 +38,6 @@ void put_transaction(struct btrfs_transaction *transaction)
38 if (atomic_dec_and_test(&transaction->use_count)) { 38 if (atomic_dec_and_test(&transaction->use_count)) {
39 BUG_ON(!list_empty(&transaction->list)); 39 BUG_ON(!list_empty(&transaction->list));
40 WARN_ON(transaction->delayed_refs.root.rb_node); 40 WARN_ON(transaction->delayed_refs.root.rb_node);
41 WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
42 memset(transaction, 0, sizeof(*transaction)); 41 memset(transaction, 0, sizeof(*transaction));
43 kmem_cache_free(btrfs_transaction_cachep, transaction); 42 kmem_cache_free(btrfs_transaction_cachep, transaction);
44 } 43 }
@@ -126,7 +125,6 @@ loop:
126 cur_trans->delayed_refs.num_heads = 0; 125 cur_trans->delayed_refs.num_heads = 0;
127 cur_trans->delayed_refs.flushing = 0; 126 cur_trans->delayed_refs.flushing = 0;
128 cur_trans->delayed_refs.run_delayed_start = 0; 127 cur_trans->delayed_refs.run_delayed_start = 0;
129 cur_trans->delayed_refs.seq = 1;
130 128
131 /* 129 /*
132 * although the tree mod log is per file system and not per transaction, 130 * although the tree mod log is per file system and not per transaction,
@@ -145,10 +143,8 @@ loop:
145 } 143 }
146 atomic_set(&fs_info->tree_mod_seq, 0); 144 atomic_set(&fs_info->tree_mod_seq, 0);
147 145
148 init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
149 spin_lock_init(&cur_trans->commit_lock); 146 spin_lock_init(&cur_trans->commit_lock);
150 spin_lock_init(&cur_trans->delayed_refs.lock); 147 spin_lock_init(&cur_trans->delayed_refs.lock);
151 INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
152 148
153 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 149 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
154 list_add_tail(&cur_trans->list, &fs_info->trans_list); 150 list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -299,6 +295,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
299 struct btrfs_transaction *cur_trans; 295 struct btrfs_transaction *cur_trans;
300 u64 num_bytes = 0; 296 u64 num_bytes = 0;
301 int ret; 297 int ret;
298 u64 qgroup_reserved = 0;
302 299
303 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 300 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
304 return ERR_PTR(-EROFS); 301 return ERR_PTR(-EROFS);
@@ -317,6 +314,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
317 * the appropriate flushing if need be. 314 * the appropriate flushing if need be.
318 */ 315 */
319 if (num_items > 0 && root != root->fs_info->chunk_root) { 316 if (num_items > 0 && root != root->fs_info->chunk_root) {
317 if (root->fs_info->quota_enabled &&
318 is_fstree(root->root_key.objectid)) {
319 qgroup_reserved = num_items * root->leafsize;
320 ret = btrfs_qgroup_reserve(root, qgroup_reserved);
321 if (ret)
322 return ERR_PTR(ret);
323 }
324
320 num_bytes = btrfs_calc_trans_metadata_size(root, num_items); 325 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
321 ret = btrfs_block_rsv_add(root, 326 ret = btrfs_block_rsv_add(root,
322 &root->fs_info->trans_block_rsv, 327 &root->fs_info->trans_block_rsv,
@@ -349,12 +354,16 @@ again:
349 h->transaction = cur_trans; 354 h->transaction = cur_trans;
350 h->blocks_used = 0; 355 h->blocks_used = 0;
351 h->bytes_reserved = 0; 356 h->bytes_reserved = 0;
357 h->root = root;
352 h->delayed_ref_updates = 0; 358 h->delayed_ref_updates = 0;
353 h->use_count = 1; 359 h->use_count = 1;
354 h->adding_csums = 0; 360 h->adding_csums = 0;
355 h->block_rsv = NULL; 361 h->block_rsv = NULL;
356 h->orig_rsv = NULL; 362 h->orig_rsv = NULL;
357 h->aborted = 0; 363 h->aborted = 0;
364 h->qgroup_reserved = qgroup_reserved;
365 h->delayed_ref_elem.seq = 0;
366 INIT_LIST_HEAD(&h->qgroup_ref_list);
358 367
359 smp_mb(); 368 smp_mb();
360 if (cur_trans->blocked && may_wait_transaction(root, type)) { 369 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -505,6 +514,24 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
505 return 0; 514 return 0;
506 } 515 }
507 516
517 /*
518 * do the qgroup accounting as early as possible
519 */
520 err = btrfs_delayed_refs_qgroup_accounting(trans, info);
521
522 btrfs_trans_release_metadata(trans, root);
523 trans->block_rsv = NULL;
524 /*
525 * the same root has to be passed to start_transaction and
526 * end_transaction. Subvolume quota depends on this.
527 */
528 WARN_ON(trans->root != root);
529
530 if (trans->qgroup_reserved) {
531 btrfs_qgroup_free(root, trans->qgroup_reserved);
532 trans->qgroup_reserved = 0;
533 }
534
508 while (count < 2) { 535 while (count < 2) {
509 unsigned long cur = trans->delayed_ref_updates; 536 unsigned long cur = trans->delayed_ref_updates;
510 trans->delayed_ref_updates = 0; 537 trans->delayed_ref_updates = 0;
@@ -559,6 +586,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
559 root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 586 root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
560 err = -EIO; 587 err = -EIO;
561 } 588 }
589 assert_qgroups_uptodate(trans);
562 590
563 memset(trans, 0, sizeof(*trans)); 591 memset(trans, 0, sizeof(*trans));
564 kmem_cache_free(btrfs_trans_handle_cachep, trans); 592 kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -777,6 +805,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
777 ret = btrfs_run_dev_stats(trans, root->fs_info); 805 ret = btrfs_run_dev_stats(trans, root->fs_info);
778 BUG_ON(ret); 806 BUG_ON(ret);
779 807
808 ret = btrfs_run_qgroups(trans, root->fs_info);
809 BUG_ON(ret);
810
811 /* run_qgroups might have added some more refs */
812 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
813 BUG_ON(ret);
814
780 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 815 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
781 next = fs_info->dirty_cowonly_roots.next; 816 next = fs_info->dirty_cowonly_roots.next;
782 list_del_init(next); 817 list_del_init(next);
@@ -949,6 +984,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
949 } 984 }
950 } 985 }
951 986
987 ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
988 objectid, pending->inherit);
989 kfree(pending->inherit);
990 if (ret) {
991 pending->error = ret;
992 goto fail;
993 }
994
952 key.objectid = objectid; 995 key.objectid = objectid;
953 key.offset = (u64)-1; 996 key.offset = (u64)-1;
954 key.type = BTRFS_ROOT_ITEM_KEY; 997 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1345,6 +1388,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1345 goto cleanup_transaction; 1388 goto cleanup_transaction;
1346 1389
1347 /* 1390 /*
1391 * running the delayed items may have added new refs. account
1392 * them now so that they hinder processing of more delayed refs
1393 * as little as possible.
1394 */
1395 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
1396
1397 /*
1348 * rename don't use btrfs_join_transaction, so, once we 1398 * rename don't use btrfs_join_transaction, so, once we
1349 * set the transaction to blocked above, we aren't going 1399 * set the transaction to blocked above, we aren't going
1350 * to get any new ordered operations. We can safely run 1400 * to get any new ordered operations. We can safely run
@@ -1456,6 +1506,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1456 root->fs_info->chunk_root->node); 1506 root->fs_info->chunk_root->node);
1457 switch_commit_root(root->fs_info->chunk_root); 1507 switch_commit_root(root->fs_info->chunk_root);
1458 1508
1509 assert_qgroups_uptodate(trans);
1459 update_super_roots(root); 1510 update_super_roots(root);
1460 1511
1461 if (!root->fs_info->log_root_recovering) { 1512 if (!root->fs_info->log_root_recovering) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d314a74b496..e8b8416c688 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -20,6 +20,7 @@
20#define __BTRFS_TRANSACTION__ 20#define __BTRFS_TRANSACTION__
21#include "btrfs_inode.h" 21#include "btrfs_inode.h"
22#include "delayed-ref.h" 22#include "delayed-ref.h"
23#include "ctree.h"
23 24
24struct btrfs_transaction { 25struct btrfs_transaction {
25 u64 transid; 26 u64 transid;
@@ -49,6 +50,7 @@ struct btrfs_transaction {
49struct btrfs_trans_handle { 50struct btrfs_trans_handle {
50 u64 transid; 51 u64 transid;
51 u64 bytes_reserved; 52 u64 bytes_reserved;
53 u64 qgroup_reserved;
52 unsigned long use_count; 54 unsigned long use_count;
53 unsigned long blocks_reserved; 55 unsigned long blocks_reserved;
54 unsigned long blocks_used; 56 unsigned long blocks_used;
@@ -58,12 +60,21 @@ struct btrfs_trans_handle {
58 struct btrfs_block_rsv *orig_rsv; 60 struct btrfs_block_rsv *orig_rsv;
59 int aborted; 61 int aborted;
60 int adding_csums; 62 int adding_csums;
63 /*
64 * this root is only needed to validate that the root passed to
65 * start_transaction is the same as the one passed to end_transaction.
66 * Subvolume quota depends on this
67 */
68 struct btrfs_root *root;
69 struct seq_list delayed_ref_elem;
70 struct list_head qgroup_ref_list;
61}; 71};
62 72
63struct btrfs_pending_snapshot { 73struct btrfs_pending_snapshot {
64 struct dentry *dentry; 74 struct dentry *dentry;
65 struct btrfs_root *root; 75 struct btrfs_root *root;
66 struct btrfs_root *snap; 76 struct btrfs_root *snap;
77 struct btrfs_qgroup_inherit *inherit;
67 /* block reservation for the operation */ 78 /* block reservation for the operation */
68 struct btrfs_block_rsv block_rsv; 79 struct btrfs_block_rsv block_rsv;
69 /* extra metadata reseration for relocation */ 80 /* extra metadata reseration for relocation */