diff options
author | Sage Weil <sage@newdream.net> | 2010-09-16 19:26:51 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-09-16 19:26:51 -0400 |
commit | ae00d4f37f4df56821331deb1028748110dd6dc9 (patch) | |
tree | 52437bde7370fc7a21193c27a08dc1d7238cee98 | |
parent | cfc0bf6640dfd0f43bf8bfec5a475284809baa4d (diff) |
ceph: fix cap_snap and realm split
The cap_snap creation/queueing relies on both the current i_head_snapc
_and_ the i_snap_realm pointers being correct, so that the new cap_snap
can properly reference the old context and the new i_head_snapc can be
updated to reference the new snaprealm's context. To fix this, we:
- move inodes completely to the new (split) realm so that i_snap_realm
is correct, and
- generate the new snapc's _before_ queueing the cap_snaps in
ceph_update_snap_trace().
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/addr.c | 4 | ||||
-rw-r--r-- | fs/ceph/snap.c | 88 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 |
3 files changed, 33 insertions, 61 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 50461b8c23a4..efbc604001c8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
411 | if (i_size < page_off + len) | 411 | if (i_size < page_off + len) |
412 | len = i_size - page_off; | 412 | len = i_size - page_off; |
413 | 413 | ||
414 | dout("writepage %p page %p index %lu on %llu~%u\n", | 414 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
415 | inode, page, page->index, page_off, len); | 415 | inode, page, page->index, page_off, len, snapc); |
416 | 416 | ||
417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); |
418 | if (writeback_stat > | 418 | if (writeback_stat > |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 9e836afba341..9e6eef14b7df 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
119 | INIT_LIST_HEAD(&realm->children); | 119 | INIT_LIST_HEAD(&realm->children); |
120 | INIT_LIST_HEAD(&realm->child_item); | 120 | INIT_LIST_HEAD(&realm->child_item); |
121 | INIT_LIST_HEAD(&realm->empty_item); | 121 | INIT_LIST_HEAD(&realm->empty_item); |
122 | INIT_LIST_HEAD(&realm->dirty_item); | ||
122 | INIT_LIST_HEAD(&realm->inodes_with_caps); | 123 | INIT_LIST_HEAD(&realm->inodes_with_caps); |
123 | spin_lock_init(&realm->inodes_with_caps_lock); | 124 | spin_lock_init(&realm->inodes_with_caps_lock); |
124 | __insert_snap_realm(&mdsc->snap_realms, realm); | 125 | __insert_snap_realm(&mdsc->snap_realms, realm); |
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, | |||
604 | struct ceph_snap_realm *realm; | 605 | struct ceph_snap_realm *realm; |
605 | int invalidate = 0; | 606 | int invalidate = 0; |
606 | int err = -ENOMEM; | 607 | int err = -ENOMEM; |
608 | LIST_HEAD(dirty_realms); | ||
607 | 609 | ||
608 | dout("update_snap_trace deletion=%d\n", deletion); | 610 | dout("update_snap_trace deletion=%d\n", deletion); |
609 | more: | 611 | more: |
@@ -626,24 +628,6 @@ more: | |||
626 | } | 628 | } |
627 | } | 629 | } |
628 | 630 | ||
629 | if (le64_to_cpu(ri->seq) > realm->seq) { | ||
630 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
631 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
632 | /* | ||
633 | * if the realm seq has changed, queue a cap_snap for every | ||
634 | * inode with open caps. we do this _before_ we update | ||
635 | * the realm info so that we prepare for writeback under the | ||
636 | * _previous_ snap context. | ||
637 | * | ||
638 | * ...unless it's a snap deletion! | ||
639 | */ | ||
640 | if (!deletion) | ||
641 | queue_realm_cap_snaps(realm); | ||
642 | } else { | ||
643 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
644 | realm->ino, realm, realm->seq); | ||
645 | } | ||
646 | |||
647 | /* ensure the parent is correct */ | 631 | /* ensure the parent is correct */ |
648 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); | 632 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); |
649 | if (err < 0) | 633 | if (err < 0) |
@@ -651,6 +635,8 @@ more: | |||
651 | invalidate += err; | 635 | invalidate += err; |
652 | 636 | ||
653 | if (le64_to_cpu(ri->seq) > realm->seq) { | 637 | if (le64_to_cpu(ri->seq) > realm->seq) { |
638 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
639 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
654 | /* update realm parameters, snap lists */ | 640 | /* update realm parameters, snap lists */ |
655 | realm->seq = le64_to_cpu(ri->seq); | 641 | realm->seq = le64_to_cpu(ri->seq); |
656 | realm->created = le64_to_cpu(ri->created); | 642 | realm->created = le64_to_cpu(ri->created); |
@@ -668,9 +654,17 @@ more: | |||
668 | if (err < 0) | 654 | if (err < 0) |
669 | goto fail; | 655 | goto fail; |
670 | 656 | ||
657 | /* queue realm for cap_snap creation */ | ||
658 | list_add(&realm->dirty_item, &dirty_realms); | ||
659 | |||
671 | invalidate = 1; | 660 | invalidate = 1; |
672 | } else if (!realm->cached_context) { | 661 | } else if (!realm->cached_context) { |
662 | dout("update_snap_trace %llx %p seq %lld new\n", | ||
663 | realm->ino, realm, realm->seq); | ||
673 | invalidate = 1; | 664 | invalidate = 1; |
665 | } else { | ||
666 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
667 | realm->ino, realm, realm->seq); | ||
674 | } | 668 | } |
675 | 669 | ||
676 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, | 670 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
@@ -683,6 +677,14 @@ more: | |||
683 | if (invalidate) | 677 | if (invalidate) |
684 | rebuild_snap_realms(realm); | 678 | rebuild_snap_realms(realm); |
685 | 679 | ||
680 | /* | ||
681 | * queue cap snaps _after_ we've built the new snap contexts, | ||
682 | * so that i_head_snapc can be set appropriately. | ||
683 | */ | ||
684 | list_for_each_entry(realm, &dirty_realms, dirty_item) { | ||
685 | queue_realm_cap_snaps(realm); | ||
686 | } | ||
687 | |||
686 | __cleanup_empty_realms(mdsc); | 688 | __cleanup_empty_realms(mdsc); |
687 | return 0; | 689 | return 0; |
688 | 690 | ||
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
816 | }; | 818 | }; |
817 | struct inode *inode = ceph_find_inode(sb, vino); | 819 | struct inode *inode = ceph_find_inode(sb, vino); |
818 | struct ceph_inode_info *ci; | 820 | struct ceph_inode_info *ci; |
821 | struct ceph_snap_realm *oldrealm; | ||
819 | 822 | ||
820 | if (!inode) | 823 | if (!inode) |
821 | continue; | 824 | continue; |
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
841 | dout(" will move %p to split realm %llx %p\n", | 844 | dout(" will move %p to split realm %llx %p\n", |
842 | inode, realm->ino, realm); | 845 | inode, realm->ino, realm); |
843 | /* | 846 | /* |
844 | * Remove the inode from the realm's inode | 847 | * Move the inode to the new realm |
845 | * list, but don't add it to the new realm | ||
846 | * yet. We don't want the cap_snap to be | ||
847 | * queued (again) by ceph_update_snap_trace() | ||
848 | * below. Queue it _now_, under the old context. | ||
849 | */ | 848 | */ |
850 | spin_lock(&realm->inodes_with_caps_lock); | 849 | spin_lock(&realm->inodes_with_caps_lock); |
851 | list_del_init(&ci->i_snap_realm_item); | 850 | list_del_init(&ci->i_snap_realm_item); |
851 | list_add(&ci->i_snap_realm_item, | ||
852 | &realm->inodes_with_caps); | ||
853 | oldrealm = ci->i_snap_realm; | ||
854 | ci->i_snap_realm = realm; | ||
852 | spin_unlock(&realm->inodes_with_caps_lock); | 855 | spin_unlock(&realm->inodes_with_caps_lock); |
853 | spin_unlock(&inode->i_lock); | 856 | spin_unlock(&inode->i_lock); |
854 | 857 | ||
855 | ceph_queue_cap_snap(ci); | 858 | ceph_get_snap_realm(mdsc, realm); |
859 | ceph_put_snap_realm(mdsc, oldrealm); | ||
856 | 860 | ||
857 | iput(inode); | 861 | iput(inode); |
858 | continue; | 862 | continue; |
@@ -880,43 +884,9 @@ skip_inode: | |||
880 | ceph_update_snap_trace(mdsc, p, e, | 884 | ceph_update_snap_trace(mdsc, p, e, |
881 | op == CEPH_SNAP_OP_DESTROY); | 885 | op == CEPH_SNAP_OP_DESTROY); |
882 | 886 | ||
883 | if (op == CEPH_SNAP_OP_SPLIT) { | 887 | if (op == CEPH_SNAP_OP_SPLIT) |
884 | /* | ||
885 | * ok, _now_ add the inodes into the new realm. | ||
886 | */ | ||
887 | for (i = 0; i < num_split_inos; i++) { | ||
888 | struct ceph_vino vino = { | ||
889 | .ino = le64_to_cpu(split_inos[i]), | ||
890 | .snap = CEPH_NOSNAP, | ||
891 | }; | ||
892 | struct inode *inode = ceph_find_inode(sb, vino); | ||
893 | struct ceph_inode_info *ci; | ||
894 | |||
895 | if (!inode) | ||
896 | continue; | ||
897 | ci = ceph_inode(inode); | ||
898 | spin_lock(&inode->i_lock); | ||
899 | if (list_empty(&ci->i_snap_realm_item)) { | ||
900 | struct ceph_snap_realm *oldrealm = | ||
901 | ci->i_snap_realm; | ||
902 | |||
903 | dout(" moving %p to split realm %llx %p\n", | ||
904 | inode, realm->ino, realm); | ||
905 | spin_lock(&realm->inodes_with_caps_lock); | ||
906 | list_add(&ci->i_snap_realm_item, | ||
907 | &realm->inodes_with_caps); | ||
908 | ci->i_snap_realm = realm; | ||
909 | spin_unlock(&realm->inodes_with_caps_lock); | ||
910 | ceph_get_snap_realm(mdsc, realm); | ||
911 | ceph_put_snap_realm(mdsc, oldrealm); | ||
912 | } | ||
913 | spin_unlock(&inode->i_lock); | ||
914 | iput(inode); | ||
915 | } | ||
916 | |||
917 | /* we took a reference when we created the realm, above */ | 888 | /* we took a reference when we created the realm, above */ |
918 | ceph_put_snap_realm(mdsc, realm); | 889 | ceph_put_snap_realm(mdsc, realm); |
919 | } | ||
920 | 890 | ||
921 | __cleanup_empty_realms(mdsc); | 891 | __cleanup_empty_realms(mdsc); |
922 | 892 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c33897ae5725..c80bfbe27b05 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -690,6 +690,8 @@ struct ceph_snap_realm { | |||
690 | 690 | ||
691 | struct list_head empty_item; /* if i have ref==0 */ | 691 | struct list_head empty_item; /* if i have ref==0 */ |
692 | 692 | ||
693 | struct list_head dirty_item; /* if realm needs new context */ | ||
694 | |||
693 | /* the current set of snaps for this realm */ | 695 | /* the current set of snaps for this realm */ |
694 | struct ceph_snap_context *cached_context; | 696 | struct ceph_snap_context *cached_context; |
695 | 697 | ||