aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2015-06-10 03:17:56 -0400
committerIlya Dryomov <idryomov@gmail.com>2015-06-25 04:49:31 -0400
commite548e9b93d3e565e42b938a99804114565be1f81 (patch)
tree13de716de6a024a26ad3930312e67685299e44c6
parenta2971c8ccb9bd7677a6c43cdbed9aacfef5e9f26 (diff)
ceph: re-send flushing caps (which are revoked) in reconnect stage
if flushing caps were revoked, we should re-send the cap flush in client reconnect stage. This guarantees that MDS processes the cap flush message before issuing the flushing caps to other client. Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r--fs/ceph/caps.c57
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/ceph/super.h7
3 files changed, 61 insertions, 6 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 420272788e01..69a16044ec41 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1486,6 +1486,7 @@ static int __mark_caps_flushing(struct inode *inode,
1486 1486
1487 cf = kmalloc(sizeof(*cf), GFP_ATOMIC); 1487 cf = kmalloc(sizeof(*cf), GFP_ATOMIC);
1488 cf->caps = flushing; 1488 cf->caps = flushing;
1489 cf->kick = false;
1489 1490
1490 spin_lock(&mdsc->cap_dirty_lock); 1491 spin_lock(&mdsc->cap_dirty_lock);
1491 list_del_init(&ci->i_dirty_item); 1492 list_del_init(&ci->i_dirty_item);
@@ -2101,7 +2102,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
2101 2102
2102static int __kick_flushing_caps(struct ceph_mds_client *mdsc, 2103static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
2103 struct ceph_mds_session *session, 2104 struct ceph_mds_session *session,
2104 struct ceph_inode_info *ci) 2105 struct ceph_inode_info *ci,
2106 bool kick_all)
2105{ 2107{
2106 struct inode *inode = &ci->vfs_inode; 2108 struct inode *inode = &ci->vfs_inode;
2107 struct ceph_cap *cap; 2109 struct ceph_cap *cap;
@@ -2127,7 +2129,9 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
2127 2129
2128 for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { 2130 for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
2129 cf = rb_entry(n, struct ceph_cap_flush, i_node); 2131 cf = rb_entry(n, struct ceph_cap_flush, i_node);
2130 if (cf->tid >= first_tid) 2132 if (cf->tid < first_tid)
2133 continue;
2134 if (kick_all || cf->kick)
2131 break; 2135 break;
2132 } 2136 }
2133 if (!n) { 2137 if (!n) {
@@ -2136,6 +2140,8 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
2136 } 2140 }
2137 2141
2138 cf = rb_entry(n, struct ceph_cap_flush, i_node); 2142 cf = rb_entry(n, struct ceph_cap_flush, i_node);
2143 cf->kick = false;
2144
2139 first_tid = cf->tid + 1; 2145 first_tid = cf->tid + 1;
2140 2146
2141 dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode, 2147 dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode,
@@ -2149,6 +2155,49 @@ static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
2149 return delayed; 2155 return delayed;
2150} 2156}
2151 2157
2158void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
2159 struct ceph_mds_session *session)
2160{
2161 struct ceph_inode_info *ci;
2162 struct ceph_cap *cap;
2163 struct ceph_cap_flush *cf;
2164 struct rb_node *n;
2165
2166 dout("early_kick_flushing_caps mds%d\n", session->s_mds);
2167 list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
2168 spin_lock(&ci->i_ceph_lock);
2169 cap = ci->i_auth_cap;
2170 if (!(cap && cap->session == session)) {
2171 pr_err("%p auth cap %p not mds%d ???\n",
2172 &ci->vfs_inode, cap, session->s_mds);
2173 spin_unlock(&ci->i_ceph_lock);
2174 continue;
2175 }
2176
2177
2178 /*
2179 * if flushing caps were revoked, we re-send the cap flush
2180 * in client reconnect stage. This guarantees MDS * processes
2181 * the cap flush message before issuing the flushing caps to
2182 * other client.
2183 */
2184 if ((cap->issued & ci->i_flushing_caps) !=
2185 ci->i_flushing_caps) {
2186 spin_unlock(&ci->i_ceph_lock);
2187 if (!__kick_flushing_caps(mdsc, session, ci, true))
2188 continue;
2189 spin_lock(&ci->i_ceph_lock);
2190 }
2191
2192 for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) {
2193 cf = rb_entry(n, struct ceph_cap_flush, i_node);
2194 cf->kick = true;
2195 }
2196
2197 spin_unlock(&ci->i_ceph_lock);
2198 }
2199}
2200
2152void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, 2201void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
2153 struct ceph_mds_session *session) 2202 struct ceph_mds_session *session)
2154{ 2203{
@@ -2158,7 +2207,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
2158 2207
2159 dout("kick_flushing_caps mds%d\n", session->s_mds); 2208 dout("kick_flushing_caps mds%d\n", session->s_mds);
2160 list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { 2209 list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
2161 int delayed = __kick_flushing_caps(mdsc, session, ci); 2210 int delayed = __kick_flushing_caps(mdsc, session, ci, false);
2162 if (delayed) { 2211 if (delayed) {
2163 spin_lock(&ci->i_ceph_lock); 2212 spin_lock(&ci->i_ceph_lock);
2164 __cap_delay_requeue(mdsc, ci); 2213 __cap_delay_requeue(mdsc, ci);
@@ -2191,7 +2240,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
2191 2240
2192 spin_unlock(&ci->i_ceph_lock); 2241 spin_unlock(&ci->i_ceph_lock);
2193 2242
2194 delayed = __kick_flushing_caps(mdsc, session, ci); 2243 delayed = __kick_flushing_caps(mdsc, session, ci, true);
2195 if (delayed) { 2244 if (delayed) {
2196 spin_lock(&ci->i_ceph_lock); 2245 spin_lock(&ci->i_ceph_lock);
2197 __cap_delay_requeue(mdsc, ci); 2246 __cap_delay_requeue(mdsc, ci);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 31f6a78caa0a..89e4305a94d4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2982,6 +2982,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2982 2982
2983 reply->hdr.data_len = cpu_to_le32(pagelist->length); 2983 reply->hdr.data_len = cpu_to_le32(pagelist->length);
2984 ceph_msg_data_add_pagelist(reply, pagelist); 2984 ceph_msg_data_add_pagelist(reply, pagelist);
2985
2986 ceph_early_kick_flushing_caps(mdsc, session);
2987
2985 ceph_con_send(&session->s_con, reply); 2988 ceph_con_send(&session->s_con, reply);
2986 2989
2987 mutex_unlock(&session->s_mutex); 2990 mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 94d91471165f..e7f13f742357 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -189,9 +189,10 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
189struct ceph_cap_flush { 189struct ceph_cap_flush {
190 u64 tid; 190 u64 tid;
191 int caps; 191 int caps;
192 struct rb_node g_node; 192 bool kick;
193 struct rb_node g_node; // global
193 union { 194 union {
194 struct rb_node i_node; 195 struct rb_node i_node; // inode
195 struct list_head list; 196 struct list_head list;
196 }; 197 };
197}; 198};
@@ -868,6 +869,8 @@ extern void ceph_queue_caps_release(struct inode *inode);
868extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); 869extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
869extern int ceph_fsync(struct file *file, loff_t start, loff_t end, 870extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
870 int datasync); 871 int datasync);
872extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
873 struct ceph_mds_session *session);
871extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, 874extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
872 struct ceph_mds_session *session); 875 struct ceph_mds_session *session);
873extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, 876extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,