aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-09-16 19:26:51 -0400
committerSage Weil <sage@newdream.net>2010-09-16 19:26:51 -0400
commitae00d4f37f4df56821331deb1028748110dd6dc9 (patch)
tree52437bde7370fc7a21193c27a08dc1d7238cee98
parentcfc0bf6640dfd0f43bf8bfec5a475284809baa4d (diff)
ceph: fix cap_snap and realm split
The cap_snap creation/queueing relies on both the current i_head_snapc _and_ the i_snap_realm pointers being correct, so that the new cap_snap can properly reference the old context and the new i_head_snapc can be updated to reference the new snaprealm's context. To fix this, we: - move inodes completely to the new (split) realm so that i_snap_realm is correct, and - generate the new snapc's _before_ queueing the cap_snaps in ceph_update_snap_trace(). Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/snap.c88
-rw-r--r--fs/ceph/super.h2
3 files changed, 33 insertions, 61 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 50461b8c23a4..efbc604001c8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
411 if (i_size < page_off + len) 411 if (i_size < page_off + len)
412 len = i_size - page_off; 412 len = i_size - page_off;
413 413
414 dout("writepage %p page %p index %lu on %llu~%u\n", 414 dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
415 inode, page, page->index, page_off, len); 415 inode, page, page->index, page_off, len, snapc);
416 416
417 writeback_stat = atomic_long_inc_return(&client->writeback_count); 417 writeback_stat = atomic_long_inc_return(&client->writeback_count);
418 if (writeback_stat > 418 if (writeback_stat >
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 9e836afba341..9e6eef14b7df 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
119 INIT_LIST_HEAD(&realm->children); 119 INIT_LIST_HEAD(&realm->children);
120 INIT_LIST_HEAD(&realm->child_item); 120 INIT_LIST_HEAD(&realm->child_item);
121 INIT_LIST_HEAD(&realm->empty_item); 121 INIT_LIST_HEAD(&realm->empty_item);
122 INIT_LIST_HEAD(&realm->dirty_item);
122 INIT_LIST_HEAD(&realm->inodes_with_caps); 123 INIT_LIST_HEAD(&realm->inodes_with_caps);
123 spin_lock_init(&realm->inodes_with_caps_lock); 124 spin_lock_init(&realm->inodes_with_caps_lock);
124 __insert_snap_realm(&mdsc->snap_realms, realm); 125 __insert_snap_realm(&mdsc->snap_realms, realm);
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
604 struct ceph_snap_realm *realm; 605 struct ceph_snap_realm *realm;
605 int invalidate = 0; 606 int invalidate = 0;
606 int err = -ENOMEM; 607 int err = -ENOMEM;
608 LIST_HEAD(dirty_realms);
607 609
608 dout("update_snap_trace deletion=%d\n", deletion); 610 dout("update_snap_trace deletion=%d\n", deletion);
609more: 611more:
@@ -626,24 +628,6 @@ more:
626 } 628 }
627 } 629 }
628 630
629 if (le64_to_cpu(ri->seq) > realm->seq) {
630 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
631 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
632 /*
633 * if the realm seq has changed, queue a cap_snap for every
634 * inode with open caps. we do this _before_ we update
635 * the realm info so that we prepare for writeback under the
636 * _previous_ snap context.
637 *
638 * ...unless it's a snap deletion!
639 */
640 if (!deletion)
641 queue_realm_cap_snaps(realm);
642 } else {
643 dout("update_snap_trace %llx %p seq %lld unchanged\n",
644 realm->ino, realm, realm->seq);
645 }
646
647 /* ensure the parent is correct */ 631 /* ensure the parent is correct */
648 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); 632 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
649 if (err < 0) 633 if (err < 0)
@@ -651,6 +635,8 @@ more:
651 invalidate += err; 635 invalidate += err;
652 636
653 if (le64_to_cpu(ri->seq) > realm->seq) { 637 if (le64_to_cpu(ri->seq) > realm->seq) {
638 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
639 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
654 /* update realm parameters, snap lists */ 640 /* update realm parameters, snap lists */
655 realm->seq = le64_to_cpu(ri->seq); 641 realm->seq = le64_to_cpu(ri->seq);
656 realm->created = le64_to_cpu(ri->created); 642 realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ more:
668 if (err < 0) 654 if (err < 0)
669 goto fail; 655 goto fail;
670 656
657 /* queue realm for cap_snap creation */
658 list_add(&realm->dirty_item, &dirty_realms);
659
671 invalidate = 1; 660 invalidate = 1;
672 } else if (!realm->cached_context) { 661 } else if (!realm->cached_context) {
662 dout("update_snap_trace %llx %p seq %lld new\n",
663 realm->ino, realm, realm->seq);
673 invalidate = 1; 664 invalidate = 1;
665 } else {
666 dout("update_snap_trace %llx %p seq %lld unchanged\n",
667 realm->ino, realm, realm->seq);
674 } 668 }
675 669
676 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, 670 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ more:
683 if (invalidate) 677 if (invalidate)
684 rebuild_snap_realms(realm); 678 rebuild_snap_realms(realm);
685 679
680 /*
681 * queue cap snaps _after_ we've built the new snap contexts,
682 * so that i_head_snapc can be set appropriately.
683 */
684 list_for_each_entry(realm, &dirty_realms, dirty_item) {
685 queue_realm_cap_snaps(realm);
686 }
687
686 __cleanup_empty_realms(mdsc); 688 __cleanup_empty_realms(mdsc);
687 return 0; 689 return 0;
688 690
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
816 }; 818 };
817 struct inode *inode = ceph_find_inode(sb, vino); 819 struct inode *inode = ceph_find_inode(sb, vino);
818 struct ceph_inode_info *ci; 820 struct ceph_inode_info *ci;
821 struct ceph_snap_realm *oldrealm;
819 822
820 if (!inode) 823 if (!inode)
821 continue; 824 continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
841 dout(" will move %p to split realm %llx %p\n", 844 dout(" will move %p to split realm %llx %p\n",
842 inode, realm->ino, realm); 845 inode, realm->ino, realm);
843 /* 846 /*
844 * Remove the inode from the realm's inode 847 * Move the inode to the new realm
845 * list, but don't add it to the new realm
846 * yet. We don't want the cap_snap to be
847 * queued (again) by ceph_update_snap_trace()
848 * below. Queue it _now_, under the old context.
849 */ 848 */
850 spin_lock(&realm->inodes_with_caps_lock); 849 spin_lock(&realm->inodes_with_caps_lock);
851 list_del_init(&ci->i_snap_realm_item); 850 list_del_init(&ci->i_snap_realm_item);
851 list_add(&ci->i_snap_realm_item,
852 &realm->inodes_with_caps);
853 oldrealm = ci->i_snap_realm;
854 ci->i_snap_realm = realm;
852 spin_unlock(&realm->inodes_with_caps_lock); 855 spin_unlock(&realm->inodes_with_caps_lock);
853 spin_unlock(&inode->i_lock); 856 spin_unlock(&inode->i_lock);
854 857
855 ceph_queue_cap_snap(ci); 858 ceph_get_snap_realm(mdsc, realm);
859 ceph_put_snap_realm(mdsc, oldrealm);
856 860
857 iput(inode); 861 iput(inode);
858 continue; 862 continue;
@@ -880,43 +884,9 @@ skip_inode:
880 ceph_update_snap_trace(mdsc, p, e, 884 ceph_update_snap_trace(mdsc, p, e,
881 op == CEPH_SNAP_OP_DESTROY); 885 op == CEPH_SNAP_OP_DESTROY);
882 886
883 if (op == CEPH_SNAP_OP_SPLIT) { 887 if (op == CEPH_SNAP_OP_SPLIT)
884 /*
885 * ok, _now_ add the inodes into the new realm.
886 */
887 for (i = 0; i < num_split_inos; i++) {
888 struct ceph_vino vino = {
889 .ino = le64_to_cpu(split_inos[i]),
890 .snap = CEPH_NOSNAP,
891 };
892 struct inode *inode = ceph_find_inode(sb, vino);
893 struct ceph_inode_info *ci;
894
895 if (!inode)
896 continue;
897 ci = ceph_inode(inode);
898 spin_lock(&inode->i_lock);
899 if (list_empty(&ci->i_snap_realm_item)) {
900 struct ceph_snap_realm *oldrealm =
901 ci->i_snap_realm;
902
903 dout(" moving %p to split realm %llx %p\n",
904 inode, realm->ino, realm);
905 spin_lock(&realm->inodes_with_caps_lock);
906 list_add(&ci->i_snap_realm_item,
907 &realm->inodes_with_caps);
908 ci->i_snap_realm = realm;
909 spin_unlock(&realm->inodes_with_caps_lock);
910 ceph_get_snap_realm(mdsc, realm);
911 ceph_put_snap_realm(mdsc, oldrealm);
912 }
913 spin_unlock(&inode->i_lock);
914 iput(inode);
915 }
916
917 /* we took a reference when we created the realm, above */ 888 /* we took a reference when we created the realm, above */
918 ceph_put_snap_realm(mdsc, realm); 889 ceph_put_snap_realm(mdsc, realm);
919 }
920 890
921 __cleanup_empty_realms(mdsc); 891 __cleanup_empty_realms(mdsc);
922 892
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c33897ae5725..c80bfbe27b05 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -690,6 +690,8 @@ struct ceph_snap_realm {
690 690
691 struct list_head empty_item; /* if i have ref==0 */ 691 struct list_head empty_item; /* if i have ref==0 */
692 692
693 struct list_head dirty_item; /* if realm needs new context */
694
693 /* the current set of snaps for this realm */ 695 /* the current set of snaps for this realm */
694 struct ceph_snap_context *cached_context; 696 struct ceph_snap_context *cached_context;
695 697