diff options
author | Yan, Zheng <zyan@redhat.com> | 2016-07-04 06:06:41 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2016-07-27 21:00:42 -0400 |
commit | 0e2943878942aee7100c94d0d40c49087dac12cb (patch) | |
tree | 8611ba2813ab13fd5a36a70dbd5005b29818d40e /fs/ceph/caps.c | |
parent | e4500b5e35c213e0f97be7cb69328c0877203a79 (diff) |
ceph: unify cap flush and snapcap flush
This patch includes following changes
- Assign flush tid to snapcap flush
- Remove session's s_cap_snaps_flushing list. Add inode to session's
s_cap_flushing list instead. Inode is removed from the list when
there is no pending snapcap flush or cap flush.
- make __kick_flushing_caps() re-send both snapcap flushes and cap
flushes.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 291 |
1 files changed, 156 insertions, 135 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e0efa75a1b98..0ac604719663 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -40,6 +40,7 @@ | |||
40 | * cluster to release server state. | 40 | * cluster to release server state. |
41 | */ | 41 | */ |
42 | 42 | ||
43 | static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc); | ||
43 | 44 | ||
44 | /* | 45 | /* |
45 | * Generate readable cap strings for debugging output. | 46 | * Generate readable cap strings for debugging output. |
@@ -1217,6 +1218,22 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1217 | return delayed; | 1218 | return delayed; |
1218 | } | 1219 | } |
1219 | 1220 | ||
1221 | static inline int __send_flush_snap(struct inode *inode, | ||
1222 | struct ceph_mds_session *session, | ||
1223 | struct ceph_cap_snap *capsnap, | ||
1224 | u32 mseq, u64 oldest_flush_tid) | ||
1225 | { | ||
1226 | return send_cap_msg(session, ceph_vino(inode).ino, 0, | ||
1227 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, | ||
1228 | capsnap->dirty, 0, capsnap->cap_flush.tid, | ||
1229 | oldest_flush_tid, 0, mseq, capsnap->size, 0, | ||
1230 | &capsnap->mtime, &capsnap->atime, | ||
1231 | &capsnap->ctime, capsnap->time_warp_seq, | ||
1232 | capsnap->uid, capsnap->gid, capsnap->mode, | ||
1233 | capsnap->xattr_version, capsnap->xattr_blob, | ||
1234 | capsnap->follows, capsnap->inline_data); | ||
1235 | } | ||
1236 | |||
1220 | /* | 1237 | /* |
1221 | * When a snapshot is taken, clients accumulate dirty metadata on | 1238 | * When a snapshot is taken, clients accumulate dirty metadata on |
1222 | * inodes with capabilities in ceph_cap_snaps to describe the file | 1239 | * inodes with capabilities in ceph_cap_snaps to describe the file |
@@ -1224,14 +1241,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1224 | * asynchronously back to the MDS once sync writes complete and dirty | 1241 | * asynchronously back to the MDS once sync writes complete and dirty |
1225 | * data is written out. | 1242 | * data is written out. |
1226 | * | 1243 | * |
1227 | * Unless @kick is true, skip cap_snaps that were already sent to | ||
1228 | * the MDS (i.e., during this session). | ||
1229 | * | ||
1230 | * Called under i_ceph_lock. Takes s_mutex as needed. | 1244 | * Called under i_ceph_lock. Takes s_mutex as needed. |
1231 | */ | 1245 | */ |
1232 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1246 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1233 | struct ceph_mds_session **psession, | 1247 | struct ceph_mds_session **psession) |
1234 | int kick) | ||
1235 | __releases(ci->i_ceph_lock) | 1248 | __releases(ci->i_ceph_lock) |
1236 | __acquires(ci->i_ceph_lock) | 1249 | __acquires(ci->i_ceph_lock) |
1237 | { | 1250 | { |
@@ -1242,6 +1255,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, | |||
1242 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 1255 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
1243 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold | 1256 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold |
1244 | session->s_mutex */ | 1257 | session->s_mutex */ |
1258 | u64 oldest_flush_tid; | ||
1245 | u64 next_follows = 0; /* keep track of how far we've gotten through the | 1259 | u64 next_follows = 0; /* keep track of how far we've gotten through the |
1246 | i_cap_snaps list, and skip these entries next time | 1260 | i_cap_snaps list, and skip these entries next time |
1247 | around to avoid an infinite loop */ | 1261 | around to avoid an infinite loop */ |
@@ -1272,7 +1286,7 @@ retry: | |||
1272 | } | 1286 | } |
1273 | 1287 | ||
1274 | /* only flush each capsnap once */ | 1288 | /* only flush each capsnap once */ |
1275 | if (!kick && !list_empty(&capsnap->flushing_item)) { | 1289 | if (capsnap->cap_flush.tid > 0) { |
1276 | dout("already flushed %p, skipping\n", capsnap); | 1290 | dout("already flushed %p, skipping\n", capsnap); |
1277 | continue; | 1291 | continue; |
1278 | } | 1292 | } |
@@ -1282,8 +1296,6 @@ retry: | |||
1282 | 1296 | ||
1283 | if (session && session->s_mds != mds) { | 1297 | if (session && session->s_mds != mds) { |
1284 | dout("oops, wrong session %p mutex\n", session); | 1298 | dout("oops, wrong session %p mutex\n", session); |
1285 | if (kick) | ||
1286 | goto out; | ||
1287 | 1299 | ||
1288 | mutex_unlock(&session->s_mutex); | 1300 | mutex_unlock(&session->s_mutex); |
1289 | ceph_put_mds_session(session); | 1301 | ceph_put_mds_session(session); |
@@ -1309,26 +1321,27 @@ retry: | |||
1309 | } | 1321 | } |
1310 | 1322 | ||
1311 | spin_lock(&mdsc->cap_dirty_lock); | 1323 | spin_lock(&mdsc->cap_dirty_lock); |
1312 | capsnap->flush_tid = ++mdsc->last_cap_flush_tid; | 1324 | capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid; |
1325 | list_add_tail(&capsnap->cap_flush.g_list, | ||
1326 | &mdsc->cap_flush_list); | ||
1327 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | ||
1328 | |||
1329 | if (list_empty(&ci->i_flushing_item)) { | ||
1330 | list_add_tail(&ci->i_flushing_item, | ||
1331 | &session->s_cap_flushing); | ||
1332 | } | ||
1313 | spin_unlock(&mdsc->cap_dirty_lock); | 1333 | spin_unlock(&mdsc->cap_dirty_lock); |
1314 | 1334 | ||
1335 | list_add_tail(&capsnap->cap_flush.i_list, | ||
1336 | &ci->i_cap_flush_list); | ||
1337 | |||
1315 | atomic_inc(&capsnap->nref); | 1338 | atomic_inc(&capsnap->nref); |
1316 | if (list_empty(&capsnap->flushing_item)) | ||
1317 | list_add_tail(&capsnap->flushing_item, | ||
1318 | &session->s_cap_snaps_flushing); | ||
1319 | spin_unlock(&ci->i_ceph_lock); | 1339 | spin_unlock(&ci->i_ceph_lock); |
1320 | 1340 | ||
1321 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", | 1341 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", |
1322 | inode, capsnap, capsnap->follows, capsnap->flush_tid); | 1342 | inode, capsnap, capsnap->follows, capsnap->cap_flush.tid); |
1323 | send_cap_msg(session, ceph_vino(inode).ino, 0, | 1343 | __send_flush_snap(inode, session, capsnap, mseq, |
1324 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, | 1344 | oldest_flush_tid); |
1325 | capsnap->dirty, 0, capsnap->flush_tid, 0, | ||
1326 | 0, mseq, capsnap->size, 0, | ||
1327 | &capsnap->mtime, &capsnap->atime, | ||
1328 | &capsnap->ctime, capsnap->time_warp_seq, | ||
1329 | capsnap->uid, capsnap->gid, capsnap->mode, | ||
1330 | capsnap->xattr_version, capsnap->xattr_blob, | ||
1331 | capsnap->follows, capsnap->inline_data); | ||
1332 | 1345 | ||
1333 | next_follows = capsnap->follows + 1; | 1346 | next_follows = capsnap->follows + 1; |
1334 | ceph_put_cap_snap(capsnap); | 1347 | ceph_put_cap_snap(capsnap); |
@@ -1354,7 +1367,7 @@ out: | |||
1354 | static void ceph_flush_snaps(struct ceph_inode_info *ci) | 1367 | static void ceph_flush_snaps(struct ceph_inode_info *ci) |
1355 | { | 1368 | { |
1356 | spin_lock(&ci->i_ceph_lock); | 1369 | spin_lock(&ci->i_ceph_lock); |
1357 | __ceph_flush_snaps(ci, NULL, 0); | 1370 | __ceph_flush_snaps(ci, NULL); |
1358 | spin_unlock(&ci->i_ceph_lock); | 1371 | spin_unlock(&ci->i_ceph_lock); |
1359 | } | 1372 | } |
1360 | 1373 | ||
@@ -1476,11 +1489,6 @@ static int __mark_caps_flushing(struct inode *inode, | |||
1476 | if (list_empty(&ci->i_flushing_item)) { | 1489 | if (list_empty(&ci->i_flushing_item)) { |
1477 | list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); | 1490 | list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); |
1478 | mdsc->num_cap_flushing++; | 1491 | mdsc->num_cap_flushing++; |
1479 | dout(" inode %p now flushing tid %llu\n", inode, cf->tid); | ||
1480 | } else { | ||
1481 | list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing); | ||
1482 | dout(" inode %p now flushing (more) tid %llu\n", | ||
1483 | inode, cf->tid); | ||
1484 | } | 1492 | } |
1485 | spin_unlock(&mdsc->cap_dirty_lock); | 1493 | spin_unlock(&mdsc->cap_dirty_lock); |
1486 | 1494 | ||
@@ -1556,7 +1564,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1556 | 1564 | ||
1557 | /* flush snaps first time around only */ | 1565 | /* flush snaps first time around only */ |
1558 | if (!list_empty(&ci->i_cap_snaps)) | 1566 | if (!list_empty(&ci->i_cap_snaps)) |
1559 | __ceph_flush_snaps(ci, &session, 0); | 1567 | __ceph_flush_snaps(ci, &session); |
1560 | goto retry_locked; | 1568 | goto retry_locked; |
1561 | retry: | 1569 | retry: |
1562 | spin_lock(&ci->i_ceph_lock); | 1570 | spin_lock(&ci->i_ceph_lock); |
@@ -1997,80 +2005,74 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1997 | return err; | 2005 | return err; |
1998 | } | 2006 | } |
1999 | 2007 | ||
2000 | /* | 2008 | static void __kick_flushing_caps(struct ceph_mds_client *mdsc, |
2001 | * After a recovering MDS goes active, we need to resend any caps | 2009 | struct ceph_mds_session *session, |
2002 | * we were flushing. | 2010 | struct ceph_inode_info *ci, |
2003 | * | 2011 | u64 oldest_flush_tid) |
2004 | * Caller holds session->s_mutex. | 2012 | __releases(ci->i_ceph_lock) |
2005 | */ | 2013 | __acquires(ci->i_ceph_lock) |
2006 | static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, | ||
2007 | struct ceph_mds_session *session) | ||
2008 | { | ||
2009 | struct ceph_cap_snap *capsnap; | ||
2010 | |||
2011 | dout("kick_flushing_capsnaps mds%d\n", session->s_mds); | ||
2012 | list_for_each_entry(capsnap, &session->s_cap_snaps_flushing, | ||
2013 | flushing_item) { | ||
2014 | struct ceph_inode_info *ci = capsnap->ci; | ||
2015 | struct inode *inode = &ci->vfs_inode; | ||
2016 | struct ceph_cap *cap; | ||
2017 | |||
2018 | spin_lock(&ci->i_ceph_lock); | ||
2019 | cap = ci->i_auth_cap; | ||
2020 | if (cap && cap->session == session) { | ||
2021 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, | ||
2022 | cap, capsnap); | ||
2023 | __ceph_flush_snaps(ci, &session, 1); | ||
2024 | } else { | ||
2025 | pr_err("%p auth cap %p not mds%d ???\n", inode, | ||
2026 | cap, session->s_mds); | ||
2027 | } | ||
2028 | spin_unlock(&ci->i_ceph_lock); | ||
2029 | } | ||
2030 | } | ||
2031 | |||
2032 | static int __kick_flushing_caps(struct ceph_mds_client *mdsc, | ||
2033 | struct ceph_mds_session *session, | ||
2034 | struct ceph_inode_info *ci) | ||
2035 | { | 2014 | { |
2036 | struct inode *inode = &ci->vfs_inode; | 2015 | struct inode *inode = &ci->vfs_inode; |
2037 | struct ceph_cap *cap; | 2016 | struct ceph_cap *cap; |
2038 | struct ceph_cap_flush *cf; | 2017 | struct ceph_cap_flush *cf; |
2039 | int delayed = 0; | 2018 | int ret; |
2040 | u64 first_tid = 0; | 2019 | u64 first_tid = 0; |
2041 | u64 oldest_flush_tid; | ||
2042 | |||
2043 | spin_lock(&mdsc->cap_dirty_lock); | ||
2044 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | ||
2045 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2046 | 2020 | ||
2047 | spin_lock(&ci->i_ceph_lock); | ||
2048 | list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) { | 2021 | list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) { |
2049 | if (cf->tid < first_tid) | 2022 | if (cf->tid < first_tid) |
2050 | continue; | 2023 | continue; |
2051 | 2024 | ||
2052 | cap = ci->i_auth_cap; | 2025 | cap = ci->i_auth_cap; |
2053 | if (!(cap && cap->session == session)) { | 2026 | if (!(cap && cap->session == session)) { |
2054 | pr_err("%p auth cap %p not mds%d ???\n", inode, | 2027 | pr_err("%p auth cap %p not mds%d ???\n", |
2055 | cap, session->s_mds); | 2028 | inode, cap, session->s_mds); |
2056 | spin_unlock(&ci->i_ceph_lock); | ||
2057 | break; | 2029 | break; |
2058 | } | 2030 | } |
2059 | 2031 | ||
2060 | first_tid = cf->tid + 1; | 2032 | first_tid = cf->tid + 1; |
2061 | 2033 | ||
2062 | dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode, | 2034 | if (cf->caps) { |
2063 | cap, cf->tid, ceph_cap_string(cf->caps)); | 2035 | dout("kick_flushing_caps %p cap %p tid %llu %s\n", |
2064 | delayed |= __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, | 2036 | inode, cap, cf->tid, ceph_cap_string(cf->caps)); |
2065 | __ceph_caps_used(ci), | 2037 | ci->i_ceph_flags |= CEPH_I_NODELAY; |
2066 | __ceph_caps_wanted(ci), | 2038 | ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, |
2067 | cap->issued | cap->implemented, | 2039 | __ceph_caps_used(ci), |
2068 | cf->caps, cf->tid, oldest_flush_tid); | 2040 | __ceph_caps_wanted(ci), |
2041 | cap->issued | cap->implemented, | ||
2042 | cf->caps, cf->tid, oldest_flush_tid); | ||
2043 | if (ret) { | ||
2044 | pr_err("kick_flushing_caps: error sending " | ||
2045 | "cap flush, ino (%llx.%llx) " | ||
2046 | "tid %llu flushing %s\n", | ||
2047 | ceph_vinop(inode), cf->tid, | ||
2048 | ceph_cap_string(cf->caps)); | ||
2049 | } | ||
2050 | } else { | ||
2051 | struct ceph_cap_snap *capsnap = | ||
2052 | container_of(cf, struct ceph_cap_snap, | ||
2053 | cap_flush); | ||
2054 | dout("kick_flushing_caps %p capsnap %p tid %llu %s\n", | ||
2055 | inode, capsnap, cf->tid, | ||
2056 | ceph_cap_string(capsnap->dirty)); | ||
2057 | |||
2058 | atomic_inc(&capsnap->nref); | ||
2059 | spin_unlock(&ci->i_ceph_lock); | ||
2060 | |||
2061 | ret = __send_flush_snap(inode, session, capsnap, cap->mseq, | ||
2062 | oldest_flush_tid); | ||
2063 | if (ret < 0) { | ||
2064 | pr_err("kick_flushing_caps: error sending " | ||
2065 | "cap flushsnap, ino (%llx.%llx) " | ||
2066 | "tid %llu follows %llu\n", | ||
2067 | ceph_vinop(inode), cf->tid, | ||
2068 | capsnap->follows); | ||
2069 | } | ||
2070 | |||
2071 | ceph_put_cap_snap(capsnap); | ||
2072 | } | ||
2069 | 2073 | ||
2070 | spin_lock(&ci->i_ceph_lock); | 2074 | spin_lock(&ci->i_ceph_lock); |
2071 | } | 2075 | } |
2072 | spin_unlock(&ci->i_ceph_lock); | ||
2073 | return delayed; | ||
2074 | } | 2076 | } |
2075 | 2077 | ||
2076 | void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, | 2078 | void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, |
@@ -2078,8 +2080,14 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, | |||
2078 | { | 2080 | { |
2079 | struct ceph_inode_info *ci; | 2081 | struct ceph_inode_info *ci; |
2080 | struct ceph_cap *cap; | 2082 | struct ceph_cap *cap; |
2083 | u64 oldest_flush_tid; | ||
2081 | 2084 | ||
2082 | dout("early_kick_flushing_caps mds%d\n", session->s_mds); | 2085 | dout("early_kick_flushing_caps mds%d\n", session->s_mds); |
2086 | |||
2087 | spin_lock(&mdsc->cap_dirty_lock); | ||
2088 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | ||
2089 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2090 | |||
2083 | list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { | 2091 | list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { |
2084 | spin_lock(&ci->i_ceph_lock); | 2092 | spin_lock(&ci->i_ceph_lock); |
2085 | cap = ci->i_auth_cap; | 2093 | cap = ci->i_auth_cap; |
@@ -2099,10 +2107,8 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, | |||
2099 | */ | 2107 | */ |
2100 | if ((cap->issued & ci->i_flushing_caps) != | 2108 | if ((cap->issued & ci->i_flushing_caps) != |
2101 | ci->i_flushing_caps) { | 2109 | ci->i_flushing_caps) { |
2102 | spin_unlock(&ci->i_ceph_lock); | 2110 | __kick_flushing_caps(mdsc, session, ci, |
2103 | if (!__kick_flushing_caps(mdsc, session, ci)) | 2111 | oldest_flush_tid); |
2104 | continue; | ||
2105 | spin_lock(&ci->i_ceph_lock); | ||
2106 | } | 2112 | } |
2107 | 2113 | ||
2108 | spin_unlock(&ci->i_ceph_lock); | 2114 | spin_unlock(&ci->i_ceph_lock); |
@@ -2113,50 +2119,43 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | |||
2113 | struct ceph_mds_session *session) | 2119 | struct ceph_mds_session *session) |
2114 | { | 2120 | { |
2115 | struct ceph_inode_info *ci; | 2121 | struct ceph_inode_info *ci; |
2116 | 2122 | u64 oldest_flush_tid; | |
2117 | kick_flushing_capsnaps(mdsc, session); | ||
2118 | 2123 | ||
2119 | dout("kick_flushing_caps mds%d\n", session->s_mds); | 2124 | dout("kick_flushing_caps mds%d\n", session->s_mds); |
2125 | |||
2126 | spin_lock(&mdsc->cap_dirty_lock); | ||
2127 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | ||
2128 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2129 | |||
2120 | list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { | 2130 | list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { |
2121 | int delayed = __kick_flushing_caps(mdsc, session, ci); | 2131 | spin_lock(&ci->i_ceph_lock); |
2122 | if (delayed) { | 2132 | __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); |
2123 | spin_lock(&ci->i_ceph_lock); | 2133 | spin_unlock(&ci->i_ceph_lock); |
2124 | __cap_delay_requeue(mdsc, ci); | ||
2125 | spin_unlock(&ci->i_ceph_lock); | ||
2126 | } | ||
2127 | } | 2134 | } |
2128 | } | 2135 | } |
2129 | 2136 | ||
2130 | static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, | 2137 | static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, |
2131 | struct ceph_mds_session *session, | 2138 | struct ceph_mds_session *session, |
2132 | struct inode *inode) | 2139 | struct inode *inode) |
2140 | __releases(ci->i_ceph_lock) | ||
2133 | { | 2141 | { |
2134 | struct ceph_inode_info *ci = ceph_inode(inode); | 2142 | struct ceph_inode_info *ci = ceph_inode(inode); |
2135 | struct ceph_cap *cap; | 2143 | struct ceph_cap *cap; |
2136 | 2144 | ||
2137 | spin_lock(&ci->i_ceph_lock); | ||
2138 | cap = ci->i_auth_cap; | 2145 | cap = ci->i_auth_cap; |
2139 | dout("kick_flushing_inode_caps %p flushing %s\n", inode, | 2146 | dout("kick_flushing_inode_caps %p flushing %s\n", inode, |
2140 | ceph_cap_string(ci->i_flushing_caps)); | 2147 | ceph_cap_string(ci->i_flushing_caps)); |
2141 | 2148 | ||
2142 | __ceph_flush_snaps(ci, &session, 1); | 2149 | if (!list_empty(&ci->i_cap_flush_list)) { |
2143 | 2150 | u64 oldest_flush_tid; | |
2144 | if (ci->i_flushing_caps) { | ||
2145 | int delayed; | ||
2146 | |||
2147 | spin_lock(&mdsc->cap_dirty_lock); | 2151 | spin_lock(&mdsc->cap_dirty_lock); |
2148 | list_move_tail(&ci->i_flushing_item, | 2152 | list_move_tail(&ci->i_flushing_item, |
2149 | &cap->session->s_cap_flushing); | 2153 | &cap->session->s_cap_flushing); |
2154 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | ||
2150 | spin_unlock(&mdsc->cap_dirty_lock); | 2155 | spin_unlock(&mdsc->cap_dirty_lock); |
2151 | 2156 | ||
2157 | __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); | ||
2152 | spin_unlock(&ci->i_ceph_lock); | 2158 | spin_unlock(&ci->i_ceph_lock); |
2153 | |||
2154 | delayed = __kick_flushing_caps(mdsc, session, ci); | ||
2155 | if (delayed) { | ||
2156 | spin_lock(&ci->i_ceph_lock); | ||
2157 | __cap_delay_requeue(mdsc, ci); | ||
2158 | spin_unlock(&ci->i_ceph_lock); | ||
2159 | } | ||
2160 | } else { | 2159 | } else { |
2161 | spin_unlock(&ci->i_ceph_lock); | 2160 | spin_unlock(&ci->i_ceph_lock); |
2162 | } | 2161 | } |
@@ -2487,12 +2486,11 @@ static int ceph_try_drop_cap_snap(struct ceph_cap_snap *capsnap) | |||
2487 | { | 2486 | { |
2488 | if (!capsnap->need_flush && | 2487 | if (!capsnap->need_flush && |
2489 | !capsnap->writing && !capsnap->dirty_pages) { | 2488 | !capsnap->writing && !capsnap->dirty_pages) { |
2490 | |||
2491 | dout("dropping cap_snap %p follows %llu\n", | 2489 | dout("dropping cap_snap %p follows %llu\n", |
2492 | capsnap, capsnap->follows); | 2490 | capsnap, capsnap->follows); |
2491 | BUG_ON(capsnap->cap_flush.tid > 0); | ||
2493 | ceph_put_snap_context(capsnap->context); | 2492 | ceph_put_snap_context(capsnap->context); |
2494 | list_del(&capsnap->ci_item); | 2493 | list_del(&capsnap->ci_item); |
2495 | list_del(&capsnap->flushing_item); | ||
2496 | ceph_put_cap_snap(capsnap); | 2494 | ceph_put_cap_snap(capsnap); |
2497 | return 1; | 2495 | return 1; |
2498 | } | 2496 | } |
@@ -2891,13 +2889,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, | |||
2891 | fill_inline = true; | 2889 | fill_inline = true; |
2892 | } | 2890 | } |
2893 | 2891 | ||
2894 | spin_unlock(&ci->i_ceph_lock); | ||
2895 | |||
2896 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { | 2892 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { |
2897 | kick_flushing_inode_caps(mdsc, session, inode); | ||
2898 | up_read(&mdsc->snap_rwsem); | ||
2899 | if (newcaps & ~issued) | 2893 | if (newcaps & ~issued) |
2900 | wake = true; | 2894 | wake = true; |
2895 | kick_flushing_inode_caps(mdsc, session, inode); | ||
2896 | up_read(&mdsc->snap_rwsem); | ||
2897 | } else { | ||
2898 | spin_unlock(&ci->i_ceph_lock); | ||
2901 | } | 2899 | } |
2902 | 2900 | ||
2903 | if (fill_inline) | 2901 | if (fill_inline) |
@@ -2951,6 +2949,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2951 | list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { | 2949 | list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { |
2952 | if (cf->tid == flush_tid) | 2950 | if (cf->tid == flush_tid) |
2953 | cleaned = cf->caps; | 2951 | cleaned = cf->caps; |
2952 | if (cf->caps == 0) /* capsnap */ | ||
2953 | continue; | ||
2954 | if (cf->tid <= flush_tid) { | 2954 | if (cf->tid <= flush_tid) { |
2955 | list_del(&cf->i_list); | 2955 | list_del(&cf->i_list); |
2956 | list_add_tail(&cf->i_list, &to_remove); | 2956 | list_add_tail(&cf->i_list, &to_remove); |
@@ -2985,13 +2985,16 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2985 | } | 2985 | } |
2986 | 2986 | ||
2987 | if (ci->i_flushing_caps == 0) { | 2987 | if (ci->i_flushing_caps == 0) { |
2988 | list_del_init(&ci->i_flushing_item); | 2988 | if (list_empty(&ci->i_cap_flush_list)) { |
2989 | if (!list_empty(&session->s_cap_flushing)) | 2989 | list_del_init(&ci->i_flushing_item); |
2990 | dout(" mds%d still flushing cap on %p\n", | 2990 | if (!list_empty(&session->s_cap_flushing)) { |
2991 | session->s_mds, | 2991 | dout(" mds%d still flushing cap on %p\n", |
2992 | &list_entry(session->s_cap_flushing.next, | 2992 | session->s_mds, |
2993 | struct ceph_inode_info, | 2993 | &list_first_entry(&session->s_cap_flushing, |
2994 | i_flushing_item)->vfs_inode); | 2994 | struct ceph_inode_info, |
2995 | i_flushing_item)->vfs_inode); | ||
2996 | } | ||
2997 | } | ||
2995 | mdsc->num_cap_flushing--; | 2998 | mdsc->num_cap_flushing--; |
2996 | dout(" inode %p now !flushing\n", inode); | 2999 | dout(" inode %p now !flushing\n", inode); |
2997 | 3000 | ||
@@ -3039,7 +3042,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, | |||
3039 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 3042 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
3040 | u64 follows = le64_to_cpu(m->snap_follows); | 3043 | u64 follows = le64_to_cpu(m->snap_follows); |
3041 | struct ceph_cap_snap *capsnap; | 3044 | struct ceph_cap_snap *capsnap; |
3042 | int drop = 0; | 3045 | int flushed = 0; |
3043 | 3046 | ||
3044 | dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", | 3047 | dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", |
3045 | inode, ci, session->s_mds, follows); | 3048 | inode, ci, session->s_mds, follows); |
@@ -3047,30 +3050,47 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, | |||
3047 | spin_lock(&ci->i_ceph_lock); | 3050 | spin_lock(&ci->i_ceph_lock); |
3048 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 3051 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
3049 | if (capsnap->follows == follows) { | 3052 | if (capsnap->follows == follows) { |
3050 | if (capsnap->flush_tid != flush_tid) { | 3053 | if (capsnap->cap_flush.tid != flush_tid) { |
3051 | dout(" cap_snap %p follows %lld tid %lld !=" | 3054 | dout(" cap_snap %p follows %lld tid %lld !=" |
3052 | " %lld\n", capsnap, follows, | 3055 | " %lld\n", capsnap, follows, |
3053 | flush_tid, capsnap->flush_tid); | 3056 | flush_tid, capsnap->cap_flush.tid); |
3054 | break; | 3057 | break; |
3055 | } | 3058 | } |
3056 | WARN_ON(capsnap->dirty_pages || capsnap->writing); | 3059 | flushed = 1; |
3057 | dout(" removing %p cap_snap %p follows %lld\n", | ||
3058 | inode, capsnap, follows); | ||
3059 | ceph_put_snap_context(capsnap->context); | ||
3060 | list_del(&capsnap->ci_item); | ||
3061 | list_del(&capsnap->flushing_item); | ||
3062 | ceph_put_cap_snap(capsnap); | ||
3063 | wake_up_all(&mdsc->cap_flushing_wq); | ||
3064 | drop = 1; | ||
3065 | break; | 3060 | break; |
3066 | } else { | 3061 | } else { |
3067 | dout(" skipping cap_snap %p follows %lld\n", | 3062 | dout(" skipping cap_snap %p follows %lld\n", |
3068 | capsnap, capsnap->follows); | 3063 | capsnap, capsnap->follows); |
3069 | } | 3064 | } |
3070 | } | 3065 | } |
3066 | if (flushed) { | ||
3067 | u64 oldest_flush_tid; | ||
3068 | WARN_ON(capsnap->dirty_pages || capsnap->writing); | ||
3069 | dout(" removing %p cap_snap %p follows %lld\n", | ||
3070 | inode, capsnap, follows); | ||
3071 | list_del(&capsnap->ci_item); | ||
3072 | list_del(&capsnap->cap_flush.i_list); | ||
3073 | |||
3074 | spin_lock(&mdsc->cap_dirty_lock); | ||
3075 | |||
3076 | if (list_empty(&ci->i_cap_flush_list)) | ||
3077 | list_del_init(&ci->i_flushing_item); | ||
3078 | |||
3079 | list_del(&capsnap->cap_flush.g_list); | ||
3080 | |||
3081 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | ||
3082 | if (oldest_flush_tid == 0 || oldest_flush_tid > flush_tid) | ||
3083 | wake_up_all(&mdsc->cap_flushing_wq); | ||
3084 | |||
3085 | spin_unlock(&mdsc->cap_dirty_lock); | ||
3086 | wake_up_all(&ci->i_cap_wq); | ||
3087 | } | ||
3071 | spin_unlock(&ci->i_ceph_lock); | 3088 | spin_unlock(&ci->i_ceph_lock); |
3072 | if (drop) | 3089 | if (flushed) { |
3090 | ceph_put_snap_context(capsnap->context); | ||
3091 | ceph_put_cap_snap(capsnap); | ||
3073 | iput(inode); | 3092 | iput(inode); |
3093 | } | ||
3074 | } | 3094 | } |
3075 | 3095 | ||
3076 | /* | 3096 | /* |
@@ -3175,7 +3195,8 @@ retry: | |||
3175 | tcap->implemented |= issued; | 3195 | tcap->implemented |= issued; |
3176 | if (cap == ci->i_auth_cap) | 3196 | if (cap == ci->i_auth_cap) |
3177 | ci->i_auth_cap = tcap; | 3197 | ci->i_auth_cap = tcap; |
3178 | if (ci->i_flushing_caps && ci->i_auth_cap == tcap) { | 3198 | if (!list_empty(&ci->i_cap_flush_list) && |
3199 | ci->i_auth_cap == tcap) { | ||
3179 | spin_lock(&mdsc->cap_dirty_lock); | 3200 | spin_lock(&mdsc->cap_dirty_lock); |
3180 | list_move_tail(&ci->i_flushing_item, | 3201 | list_move_tail(&ci->i_flushing_item, |
3181 | &tcap->session->s_cap_flushing); | 3202 | &tcap->session->s_cap_flushing); |