aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-03-01 18:16:56 -0500
committerSage Weil <sage@newdream.net>2010-03-01 18:28:02 -0500
commite9964c102312967a4bc1fd501cb628c4a3b19034 (patch)
tree7bccb0c4e29d94baaf8c30e008ec5aebdccf9c57
parent7af8f1e4aa86720840d3318e4dc225c3c7e5a6d0 (diff)
ceph: fix flush_dirty_caps race with caps migration
The flush_dirty_caps() used to loop over the first entry of the cap_dirty dirty list on the assumption that after calling ceph_check_caps() it would be removed from the list. This isn't true for caps that are being migrated between MDSs, where we've received the EXPORT but not the IMPORT. Instead, do a safe list iteration, and pin the next inode on the list via the CEPH_I_NOFLUSH flag. Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--fs/ceph/caps.c45
-rw-r--r--fs/ceph/super.h1
2 files changed, 39 insertions, 7 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 295b7e547a31..8b89b9123252 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1573,6 +1573,11 @@ retry_locked:
1573 } 1573 }
1574 1574
1575ack: 1575ack:
1576 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
1577 dout(" skipping %p I_NOFLUSH set\n", inode);
1578 continue;
1579 }
1580
1576 if (session && session != cap->session) { 1581 if (session && session != cap->session) {
1577 dout("oops, wrong session %p mutex\n", session); 1582 dout("oops, wrong session %p mutex\n", session);
1578 mutex_unlock(&session->s_mutex); 1583 mutex_unlock(&session->s_mutex);
@@ -1652,6 +1657,10 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
1652 1657
1653retry: 1658retry:
1654 spin_lock(&inode->i_lock); 1659 spin_lock(&inode->i_lock);
1660 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
1661 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
1662 goto out;
1663 }
1655 if (ci->i_dirty_caps && ci->i_auth_cap) { 1664 if (ci->i_dirty_caps && ci->i_auth_cap) {
1656 struct ceph_cap *cap = ci->i_auth_cap; 1665 struct ceph_cap *cap = ci->i_auth_cap;
1657 int used = __ceph_caps_used(ci); 1666 int used = __ceph_caps_used(ci);
@@ -2747,16 +2756,38 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
2747 */ 2756 */
2748void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) 2757void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2749{ 2758{
2750 struct ceph_inode_info *ci; 2759 struct ceph_inode_info *ci, *nci = NULL;
2751 struct inode *inode; 2760 struct inode *inode, *ninode = NULL;
2761 struct list_head *p, *n;
2752 2762
2753 dout("flush_dirty_caps\n"); 2763 dout("flush_dirty_caps\n");
2754 spin_lock(&mdsc->cap_dirty_lock); 2764 spin_lock(&mdsc->cap_dirty_lock);
2755 while (!list_empty(&mdsc->cap_dirty)) { 2765 list_for_each_safe(p, n, &mdsc->cap_dirty) {
2756 ci = list_first_entry(&mdsc->cap_dirty, 2766 if (nci) {
2757 struct ceph_inode_info, 2767 ci = nci;
2758 i_dirty_item); 2768 inode = ninode;
2759 inode = igrab(&ci->vfs_inode); 2769 ci->i_ceph_flags &= ~CEPH_I_NOFLUSH;
2770 dout("flush_dirty_caps inode %p (was next inode)\n",
2771 inode);
2772 } else {
2773 ci = list_entry(p, struct ceph_inode_info,
2774 i_dirty_item);
2775 inode = igrab(&ci->vfs_inode);
2776 BUG_ON(!inode);
2777 dout("flush_dirty_caps inode %p\n", inode);
2778 }
2779 if (n != &mdsc->cap_dirty) {
2780 nci = list_entry(n, struct ceph_inode_info,
2781 i_dirty_item);
2782 ninode = igrab(&nci->vfs_inode);
2783 BUG_ON(!ninode);
2784 nci->i_ceph_flags |= CEPH_I_NOFLUSH;
2785 dout("flush_dirty_caps next inode %p, noflush\n",
2786 ninode);
2787 } else {
2788 nci = NULL;
2789 ninode = NULL;
2790 }
2760 spin_unlock(&mdsc->cap_dirty_lock); 2791 spin_unlock(&mdsc->cap_dirty_lock);
2761 if (inode) { 2792 if (inode) {
2762 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2793 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ff7aaa32736c..6a778f2c3f6e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -289,6 +289,7 @@ struct ceph_inode_xattrs_info {
289#define CEPH_I_COMPLETE 1 /* we have complete directory cached */ 289#define CEPH_I_COMPLETE 1 /* we have complete directory cached */
290#define CEPH_I_NODELAY 4 /* do not delay cap release */ 290#define CEPH_I_NODELAY 4 /* do not delay cap release */
291#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ 291#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
292#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
292 293
293struct ceph_inode_info { 294struct ceph_inode_info {
294 struct ceph_vino i_vino; /* ceph ino + snap */ 295 struct ceph_vino i_vino; /* ceph ino + snap */