diff options
author | Sage Weil <sage@newdream.net> | 2010-03-01 18:16:56 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-03-01 18:28:02 -0500 |
commit | e9964c102312967a4bc1fd501cb628c4a3b19034 (patch) | |
tree | 7bccb0c4e29d94baaf8c30e008ec5aebdccf9c57 | |
parent | 7af8f1e4aa86720840d3318e4dc225c3c7e5a6d0 (diff) |
ceph: fix flush_dirty_caps race with caps migration
The flush_dirty_caps() used to loop over the first entry of the cap_dirty
dirty list on the assumption that after calling ceph_check_caps() it would
be removed from the list. This isn't true for caps that are being
migrated between MDSs, where we've received the EXPORT but not the IMPORT.
Instead, do a safe list iteration, and pin the next inode on the list via
the CEPH_I_NOFLUSH flag.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/caps.c | 45 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 |
2 files changed, 39 insertions, 7 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 295b7e547a31..8b89b9123252 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1573,6 +1573,11 @@ retry_locked: | |||
1573 | } | 1573 | } |
1574 | 1574 | ||
1575 | ack: | 1575 | ack: |
1576 | if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { | ||
1577 | dout(" skipping %p I_NOFLUSH set\n", inode); | ||
1578 | continue; | ||
1579 | } | ||
1580 | |||
1576 | if (session && session != cap->session) { | 1581 | if (session && session != cap->session) { |
1577 | dout("oops, wrong session %p mutex\n", session); | 1582 | dout("oops, wrong session %p mutex\n", session); |
1578 | mutex_unlock(&session->s_mutex); | 1583 | mutex_unlock(&session->s_mutex); |
@@ -1652,6 +1657,10 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | |||
1652 | 1657 | ||
1653 | retry: | 1658 | retry: |
1654 | spin_lock(&inode->i_lock); | 1659 | spin_lock(&inode->i_lock); |
1660 | if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { | ||
1661 | dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); | ||
1662 | goto out; | ||
1663 | } | ||
1655 | if (ci->i_dirty_caps && ci->i_auth_cap) { | 1664 | if (ci->i_dirty_caps && ci->i_auth_cap) { |
1656 | struct ceph_cap *cap = ci->i_auth_cap; | 1665 | struct ceph_cap *cap = ci->i_auth_cap; |
1657 | int used = __ceph_caps_used(ci); | 1666 | int used = __ceph_caps_used(ci); |
@@ -2747,16 +2756,38 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | |||
2747 | */ | 2756 | */ |
2748 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | 2757 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) |
2749 | { | 2758 | { |
2750 | struct ceph_inode_info *ci; | 2759 | struct ceph_inode_info *ci, *nci = NULL; |
2751 | struct inode *inode; | 2760 | struct inode *inode, *ninode = NULL; |
2761 | struct list_head *p, *n; | ||
2752 | 2762 | ||
2753 | dout("flush_dirty_caps\n"); | 2763 | dout("flush_dirty_caps\n"); |
2754 | spin_lock(&mdsc->cap_dirty_lock); | 2764 | spin_lock(&mdsc->cap_dirty_lock); |
2755 | while (!list_empty(&mdsc->cap_dirty)) { | 2765 | list_for_each_safe(p, n, &mdsc->cap_dirty) { |
2756 | ci = list_first_entry(&mdsc->cap_dirty, | 2766 | if (nci) { |
2757 | struct ceph_inode_info, | 2767 | ci = nci; |
2758 | i_dirty_item); | 2768 | inode = ninode; |
2759 | inode = igrab(&ci->vfs_inode); | 2769 | ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; |
2770 | dout("flush_dirty_caps inode %p (was next inode)\n", | ||
2771 | inode); | ||
2772 | } else { | ||
2773 | ci = list_entry(p, struct ceph_inode_info, | ||
2774 | i_dirty_item); | ||
2775 | inode = igrab(&ci->vfs_inode); | ||
2776 | BUG_ON(!inode); | ||
2777 | dout("flush_dirty_caps inode %p\n", inode); | ||
2778 | } | ||
2779 | if (n != &mdsc->cap_dirty) { | ||
2780 | nci = list_entry(n, struct ceph_inode_info, | ||
2781 | i_dirty_item); | ||
2782 | ninode = igrab(&nci->vfs_inode); | ||
2783 | BUG_ON(!ninode); | ||
2784 | nci->i_ceph_flags |= CEPH_I_NOFLUSH; | ||
2785 | dout("flush_dirty_caps next inode %p, noflush\n", | ||
2786 | ninode); | ||
2787 | } else { | ||
2788 | nci = NULL; | ||
2789 | ninode = NULL; | ||
2790 | } | ||
2760 | spin_unlock(&mdsc->cap_dirty_lock); | 2791 | spin_unlock(&mdsc->cap_dirty_lock); |
2761 | if (inode) { | 2792 | if (inode) { |
2762 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, | 2793 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ff7aaa32736c..6a778f2c3f6e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -289,6 +289,7 @@ struct ceph_inode_xattrs_info { | |||
289 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | 289 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ |
290 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | 290 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ |
291 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | 291 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ |
292 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
292 | 293 | ||
293 | struct ceph_inode_info { | 294 | struct ceph_inode_info { |
294 | struct ceph_vino i_vino; /* ceph ino + snap */ | 295 | struct ceph_vino i_vino; /* ceph ino + snap */ |