diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/btrfs/ctree.c | 408 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 5 |
2 files changed, 412 insertions, 1 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 56485b3b7c31..72b9f97e2fdc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| 21 | #include <linux/rbtree.h> | ||
| 21 | #include "ctree.h" | 22 | #include "ctree.h" |
| 22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
| 23 | #include "transaction.h" | 24 | #include "transaction.h" |
| @@ -288,6 +289,412 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
| 288 | return 0; | 289 | return 0; |
| 289 | } | 290 | } |
| 290 | 291 | ||
| 292 | enum mod_log_op { | ||
| 293 | MOD_LOG_KEY_REPLACE, | ||
| 294 | MOD_LOG_KEY_ADD, | ||
| 295 | MOD_LOG_KEY_REMOVE, | ||
| 296 | MOD_LOG_KEY_REMOVE_WHILE_FREEING, | ||
| 297 | MOD_LOG_KEY_REMOVE_WHILE_MOVING, | ||
| 298 | MOD_LOG_MOVE_KEYS, | ||
| 299 | MOD_LOG_ROOT_REPLACE, | ||
| 300 | }; | ||
| 301 | |||
| 302 | struct tree_mod_move { | ||
| 303 | int dst_slot; | ||
| 304 | int nr_items; | ||
| 305 | }; | ||
| 306 | |||
| 307 | struct tree_mod_root { | ||
| 308 | u64 logical; | ||
| 309 | u8 level; | ||
| 310 | }; | ||
| 311 | |||
| 312 | struct tree_mod_elem { | ||
| 313 | struct rb_node node; | ||
| 314 | u64 index; /* shifted logical */ | ||
| 315 | struct seq_list elem; | ||
| 316 | enum mod_log_op op; | ||
| 317 | |||
| 318 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | ||
| 319 | int slot; | ||
| 320 | |||
| 321 | /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ | ||
| 322 | u64 generation; | ||
| 323 | |||
| 324 | /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ | ||
| 325 | struct btrfs_disk_key key; | ||
| 326 | u64 blockptr; | ||
| 327 | |||
| 328 | /* this is used for op == MOD_LOG_MOVE_KEYS */ | ||
| 329 | struct tree_mod_move move; | ||
| 330 | |||
| 331 | /* this is used for op == MOD_LOG_ROOT_REPLACE */ | ||
| 332 | struct tree_mod_root old_root; | ||
| 333 | }; | ||
| 334 | |||
| 335 | static inline void | ||
| 336 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
| 337 | { | ||
| 338 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | ||
| 339 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
| 340 | } | ||
| 341 | |||
| 342 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 343 | struct seq_list *elem) | ||
| 344 | { | ||
| 345 | elem->flags = 1; | ||
| 346 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
| 347 | __get_tree_mod_seq(fs_info, elem); | ||
| 348 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
| 349 | } | ||
| 350 | |||
| 351 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 352 | struct seq_list *elem) | ||
| 353 | { | ||
| 354 | struct rb_root *tm_root; | ||
| 355 | struct rb_node *node; | ||
| 356 | struct rb_node *next; | ||
| 357 | struct seq_list *cur_elem; | ||
| 358 | struct tree_mod_elem *tm; | ||
| 359 | u64 min_seq = (u64)-1; | ||
| 360 | u64 seq_putting = elem->seq; | ||
| 361 | |||
| 362 | if (!seq_putting) | ||
| 363 | return; | ||
| 364 | |||
| 365 | BUG_ON(!(elem->flags & 1)); | ||
| 366 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
| 367 | list_del(&elem->list); | ||
| 368 | |||
| 369 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | ||
| 370 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | ||
| 371 | if (seq_putting > cur_elem->seq) { | ||
| 372 | /* | ||
| 373 | * blocker with lower sequence number exists, we | ||
| 374 | * cannot remove anything from the log | ||
| 375 | */ | ||
| 376 | goto out; | ||
| 377 | } | ||
| 378 | min_seq = cur_elem->seq; | ||
| 379 | } | ||
| 380 | } | ||
| 381 | |||
| 382 | /* | ||
| 383 | * anything that's lower than the lowest existing (read: blocked) | ||
| 384 | * sequence number can be removed from the tree. | ||
| 385 | */ | ||
| 386 | write_lock(&fs_info->tree_mod_log_lock); | ||
| 387 | tm_root = &fs_info->tree_mod_log; | ||
| 388 | for (node = rb_first(tm_root); node; node = next) { | ||
| 389 | next = rb_next(node); | ||
| 390 | tm = container_of(node, struct tree_mod_elem, node); | ||
| 391 | if (tm->elem.seq > min_seq) | ||
| 392 | continue; | ||
| 393 | rb_erase(node, tm_root); | ||
| 394 | list_del(&tm->elem.list); | ||
| 395 | kfree(tm); | ||
| 396 | } | ||
| 397 | write_unlock(&fs_info->tree_mod_log_lock); | ||
| 398 | out: | ||
| 399 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
| 400 | } | ||
| 401 | |||
| 402 | /* | ||
| 403 | * key order of the log: | ||
| 404 | * index -> sequence | ||
| 405 | * | ||
| 406 | * the index is the shifted logical of the *new* root node for root replace | ||
| 407 | * operations, or the shifted logical of the affected block for all other | ||
| 408 | * operations. | ||
| 409 | */ | ||
| 410 | static noinline int | ||
| 411 | __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | ||
| 412 | { | ||
| 413 | struct rb_root *tm_root; | ||
| 414 | struct rb_node **new; | ||
| 415 | struct rb_node *parent = NULL; | ||
| 416 | struct tree_mod_elem *cur; | ||
| 417 | int ret = 0; | ||
| 418 | |||
| 419 | BUG_ON(!tm || !tm->elem.seq); | ||
| 420 | |||
| 421 | write_lock(&fs_info->tree_mod_log_lock); | ||
| 422 | tm_root = &fs_info->tree_mod_log; | ||
| 423 | new = &tm_root->rb_node; | ||
| 424 | while (*new) { | ||
| 425 | cur = container_of(*new, struct tree_mod_elem, node); | ||
| 426 | parent = *new; | ||
| 427 | if (cur->index < tm->index) | ||
| 428 | new = &((*new)->rb_left); | ||
| 429 | else if (cur->index > tm->index) | ||
| 430 | new = &((*new)->rb_right); | ||
| 431 | else if (cur->elem.seq < tm->elem.seq) | ||
| 432 | new = &((*new)->rb_left); | ||
| 433 | else if (cur->elem.seq > tm->elem.seq) | ||
| 434 | new = &((*new)->rb_right); | ||
| 435 | else { | ||
| 436 | kfree(tm); | ||
| 437 | ret = -EEXIST; | ||
| 438 | goto unlock; | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | rb_link_node(&tm->node, parent, new); | ||
| 443 | rb_insert_color(&tm->node, tm_root); | ||
| 444 | unlock: | ||
| 445 | write_unlock(&fs_info->tree_mod_log_lock); | ||
| 446 | return ret; | ||
| 447 | } | ||
| 448 | |||
| 449 | int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | ||
| 450 | struct tree_mod_elem **tm_ret) | ||
| 451 | { | ||
| 452 | struct tree_mod_elem *tm; | ||
| 453 | u64 seq = 0; | ||
| 454 | |||
| 455 | smp_mb(); | ||
| 456 | if (list_empty(&fs_info->tree_mod_seq_list)) | ||
| 457 | return 0; | ||
| 458 | |||
| 459 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | ||
| 460 | if (!tm) | ||
| 461 | return -ENOMEM; | ||
| 462 | |||
| 463 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
| 464 | seq = tm->elem.seq; | ||
| 465 | tm->elem.flags = 0; | ||
| 466 | |||
| 467 | return seq; | ||
| 468 | } | ||
| 469 | |||
| 470 | static noinline int | ||
| 471 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
| 472 | struct extent_buffer *eb, int slot, | ||
| 473 | enum mod_log_op op, gfp_t flags) | ||
| 474 | { | ||
| 475 | struct tree_mod_elem *tm; | ||
| 476 | int ret; | ||
| 477 | |||
| 478 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
| 479 | if (ret <= 0) | ||
| 480 | return ret; | ||
| 481 | |||
| 482 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
| 483 | if (op != MOD_LOG_KEY_ADD) { | ||
| 484 | btrfs_node_key(eb, &tm->key, slot); | ||
| 485 | tm->blockptr = btrfs_node_blockptr(eb, slot); | ||
| 486 | } | ||
| 487 | tm->op = op; | ||
| 488 | tm->slot = slot; | ||
| 489 | tm->generation = btrfs_node_ptr_generation(eb, slot); | ||
| 490 | |||
| 491 | return __tree_mod_log_insert(fs_info, tm); | ||
| 492 | } | ||
| 493 | |||
| 494 | static noinline int | ||
| 495 | tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
| 496 | int slot, enum mod_log_op op) | ||
| 497 | { | ||
| 498 | return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); | ||
| 499 | } | ||
| 500 | |||
| 501 | static noinline int | ||
| 502 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | ||
| 503 | struct extent_buffer *eb, int dst_slot, int src_slot, | ||
| 504 | int nr_items, gfp_t flags) | ||
| 505 | { | ||
| 506 | struct tree_mod_elem *tm; | ||
| 507 | int ret; | ||
| 508 | int i; | ||
| 509 | |||
| 510 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
| 511 | if (ret <= 0) | ||
| 512 | return ret; | ||
| 513 | |||
| 514 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | ||
| 515 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | ||
| 516 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | ||
| 517 | BUG_ON(ret < 0); | ||
| 518 | } | ||
| 519 | |||
| 520 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
| 521 | tm->slot = src_slot; | ||
| 522 | tm->move.dst_slot = dst_slot; | ||
| 523 | tm->move.nr_items = nr_items; | ||
| 524 | tm->op = MOD_LOG_MOVE_KEYS; | ||
| 525 | |||
| 526 | return __tree_mod_log_insert(fs_info, tm); | ||
| 527 | } | ||
| 528 | |||
| 529 | static noinline int | ||
| 530 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | ||
| 531 | struct extent_buffer *old_root, | ||
| 532 | struct extent_buffer *new_root, gfp_t flags) | ||
| 533 | { | ||
| 534 | struct tree_mod_elem *tm; | ||
| 535 | int ret; | ||
| 536 | |||
| 537 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
| 538 | if (ret <= 0) | ||
| 539 | return ret; | ||
| 540 | |||
| 541 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | ||
| 542 | tm->old_root.logical = old_root->start; | ||
| 543 | tm->old_root.level = btrfs_header_level(old_root); | ||
| 544 | tm->generation = btrfs_header_generation(old_root); | ||
| 545 | tm->op = MOD_LOG_ROOT_REPLACE; | ||
| 546 | |||
| 547 | return __tree_mod_log_insert(fs_info, tm); | ||
| 548 | } | ||
| 549 | |||
| 550 | static struct tree_mod_elem * | ||
| 551 | __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | ||
| 552 | int smallest) | ||
| 553 | { | ||
| 554 | struct rb_root *tm_root; | ||
| 555 | struct rb_node *node; | ||
| 556 | struct tree_mod_elem *cur = NULL; | ||
| 557 | struct tree_mod_elem *found = NULL; | ||
| 558 | u64 index = start >> PAGE_CACHE_SHIFT; | ||
| 559 | |||
| 560 | read_lock(&fs_info->tree_mod_log_lock); | ||
| 561 | tm_root = &fs_info->tree_mod_log; | ||
| 562 | node = tm_root->rb_node; | ||
| 563 | while (node) { | ||
| 564 | cur = container_of(node, struct tree_mod_elem, node); | ||
| 565 | if (cur->index < index) { | ||
| 566 | node = node->rb_left; | ||
| 567 | } else if (cur->index > index) { | ||
| 568 | node = node->rb_right; | ||
| 569 | } else if (cur->elem.seq < min_seq) { | ||
| 570 | node = node->rb_left; | ||
| 571 | } else if (!smallest) { | ||
| 572 | /* we want the node with the highest seq */ | ||
| 573 | if (found) | ||
| 574 | BUG_ON(found->elem.seq > cur->elem.seq); | ||
| 575 | found = cur; | ||
| 576 | node = node->rb_left; | ||
| 577 | } else if (cur->elem.seq > min_seq) { | ||
| 578 | /* we want the node with the smallest seq */ | ||
| 579 | if (found) | ||
| 580 | BUG_ON(found->elem.seq < cur->elem.seq); | ||
| 581 | found = cur; | ||
| 582 | node = node->rb_right; | ||
| 583 | } else { | ||
| 584 | found = cur; | ||
| 585 | break; | ||
| 586 | } | ||
| 587 | } | ||
| 588 | read_unlock(&fs_info->tree_mod_log_lock); | ||
| 589 | |||
| 590 | return found; | ||
| 591 | } | ||
| 592 | |||
| 593 | /* | ||
| 594 | * this returns the element from the log with the smallest time sequence | ||
| 595 | * value that's in the log (the oldest log item). any element with a time | ||
| 596 | * sequence lower than min_seq will be ignored. | ||
| 597 | */ | ||
| 598 | static struct tree_mod_elem * | ||
| 599 | tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, | ||
| 600 | u64 min_seq) | ||
| 601 | { | ||
| 602 | return __tree_mod_log_search(fs_info, start, min_seq, 1); | ||
| 603 | } | ||
| 604 | |||
| 605 | /* | ||
| 606 | * this returns the element from the log with the largest time sequence | ||
| 607 | * value that's in the log (the most recent log item). any element with | ||
| 608 | * a time sequence lower than min_seq will be ignored. | ||
| 609 | */ | ||
| 610 | static struct tree_mod_elem * | ||
| 611 | tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | ||
| 612 | { | ||
| 613 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | ||
| 614 | } | ||
| 615 | |||
| 616 | static inline void | ||
| 617 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
| 618 | struct extent_buffer *src, unsigned long dst_offset, | ||
| 619 | unsigned long src_offset, int nr_items) | ||
| 620 | { | ||
| 621 | int ret; | ||
| 622 | int i; | ||
| 623 | |||
| 624 | smp_mb(); | ||
| 625 | if (list_empty(&fs_info->tree_mod_seq_list)) | ||
| 626 | return; | ||
| 627 | |||
| 628 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | ||
| 629 | return; | ||
| 630 | |||
| 631 | /* speed this up by single seq for all operations? */ | ||
| 632 | for (i = 0; i < nr_items; i++) { | ||
| 633 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | ||
| 634 | MOD_LOG_KEY_REMOVE); | ||
| 635 | BUG_ON(ret < 0); | ||
| 636 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | ||
| 637 | MOD_LOG_KEY_ADD); | ||
| 638 | BUG_ON(ret < 0); | ||
| 639 | } | ||
| 640 | } | ||
| 641 | |||
| 642 | static inline void | ||
| 643 | tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
| 644 | int dst_offset, int src_offset, int nr_items) | ||
| 645 | { | ||
| 646 | int ret; | ||
| 647 | ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, | ||
| 648 | nr_items, GFP_NOFS); | ||
| 649 | BUG_ON(ret < 0); | ||
| 650 | } | ||
| 651 | |||
| 652 | static inline void | ||
| 653 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | ||
| 654 | struct extent_buffer *eb, | ||
| 655 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
| 656 | { | ||
| 657 | int ret; | ||
| 658 | |||
| 659 | ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, | ||
| 660 | MOD_LOG_KEY_REPLACE, | ||
| 661 | atomic ? GFP_ATOMIC : GFP_NOFS); | ||
| 662 | BUG_ON(ret < 0); | ||
| 663 | } | ||
| 664 | |||
| 665 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
| 666 | struct extent_buffer *eb) | ||
| 667 | { | ||
| 668 | int i; | ||
| 669 | int ret; | ||
| 670 | u32 nritems; | ||
| 671 | |||
| 672 | smp_mb(); | ||
| 673 | if (list_empty(&fs_info->tree_mod_seq_list)) | ||
| 674 | return; | ||
| 675 | |||
| 676 | if (btrfs_header_level(eb) == 0) | ||
| 677 | return; | ||
| 678 | |||
| 679 | nritems = btrfs_header_nritems(eb); | ||
| 680 | for (i = nritems - 1; i >= 0; i--) { | ||
| 681 | ret = tree_mod_log_insert_key(fs_info, eb, i, | ||
| 682 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
| 683 | BUG_ON(ret < 0); | ||
| 684 | } | ||
| 685 | } | ||
| 686 | |||
| 687 | static inline void | ||
| 688 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | ||
| 689 | struct extent_buffer *new_root_node) | ||
| 690 | { | ||
| 691 | int ret; | ||
| 692 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
| 693 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | ||
| 694 | new_root_node, GFP_NOFS); | ||
| 695 | BUG_ON(ret < 0); | ||
| 696 | } | ||
| 697 | |||
| 291 | /* | 698 | /* |
| 292 | * check if the tree block can be shared by multiple trees | 699 | * check if the tree block can be shared by multiple trees |
| 293 | */ | 700 | */ |
| @@ -2271,7 +2678,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 2271 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 2678 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
| 2272 | BTRFS_UUID_SIZE); | 2679 | BTRFS_UUID_SIZE); |
| 2273 | 2680 | ||
| 2274 | |||
| 2275 | copy_extent_buffer(split, c, | 2681 | copy_extent_buffer(split, c, |
| 2276 | btrfs_node_key_ptr_offset(0), | 2682 | btrfs_node_key_ptr_offset(0), |
| 2277 | btrfs_node_key_ptr_offset(mid), | 2683 | btrfs_node_key_ptr_offset(mid), |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 01639e100045..e53bfb9b3915 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -3114,4 +3114,9 @@ struct seq_list { | |||
| 3114 | u32 flags; | 3114 | u32 flags; |
| 3115 | }; | 3115 | }; |
| 3116 | 3116 | ||
| 3117 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 3118 | struct seq_list *elem); | ||
| 3119 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 3120 | struct seq_list *elem); | ||
| 3121 | |||
| 3117 | #endif | 3122 | #endif |
