diff options
author | Yan, Zheng <zyan@redhat.com> | 2016-07-05 09:08:07 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2016-07-27 21:00:44 -0400 |
commit | ed9b430c9ba99e70e8ddd7e08429c4c2a620ba74 (patch) | |
tree | 01af2d3659aeb5635b9e5644319bed8f9aee7aac | |
parent | 7bc00fddb9de7f78f742bc24d95e15abde15c078 (diff) |
ceph: cleanup ceph_flush_snaps()
This patch devide __ceph_flush_snaps() into two stags. In the first
stage, __ceph_flush_snaps() assign snapcaps flush TIDs and add them
to cap flush lists. __ceph_flush_snaps() keeps holding the
i_ceph_lock in this stagge. So inode's auth cap can not change. In
the second stage, __ceph_flush_snaps() send flushsnap cap messages.
i_ceph_lock is unlocked before sending each cap message. If auth cap
changes in the middle, __ceph_flush_snaps() just stops. This is OK
because kick_flushing_inode_caps() will re-send flushsnap cap messages
to inode's new auth MDS.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r-- | fs/ceph/caps.c | 185 | ||||
-rw-r--r-- | fs/ceph/snap.c | 4 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 |
3 files changed, 105 insertions, 88 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 39e471d0aa50..736e1c86bcf3 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1247,32 +1247,20 @@ static inline int __send_flush_snap(struct inode *inode, | |||
1247 | * | 1247 | * |
1248 | * Called under i_ceph_lock. Takes s_mutex as needed. | 1248 | * Called under i_ceph_lock. Takes s_mutex as needed. |
1249 | */ | 1249 | */ |
1250 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1250 | static void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1251 | struct ceph_mds_session **psession) | 1251 | struct ceph_mds_session *session) |
1252 | __releases(ci->i_ceph_lock) | 1252 | __releases(ci->i_ceph_lock) |
1253 | __acquires(ci->i_ceph_lock) | 1253 | __acquires(ci->i_ceph_lock) |
1254 | { | 1254 | { |
1255 | struct inode *inode = &ci->vfs_inode; | 1255 | struct inode *inode = &ci->vfs_inode; |
1256 | int mds; | 1256 | struct ceph_mds_client *mdsc = session->s_mdsc; |
1257 | struct ceph_cap_snap *capsnap; | 1257 | struct ceph_cap_snap *capsnap; |
1258 | u32 mseq; | 1258 | u64 oldest_flush_tid = 0; |
1259 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 1259 | u64 first_tid = 1, last_tid = 0; |
1260 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold | ||
1261 | session->s_mutex */ | ||
1262 | u64 oldest_flush_tid; | ||
1263 | u64 next_follows = 0; /* keep track of how far we've gotten through the | ||
1264 | i_cap_snaps list, and skip these entries next time | ||
1265 | around to avoid an infinite loop */ | ||
1266 | 1260 | ||
1267 | if (psession) | 1261 | dout("__flush_snaps %p session %p\n", inode, session); |
1268 | session = *psession; | ||
1269 | 1262 | ||
1270 | dout("__flush_snaps %p\n", inode); | ||
1271 | retry: | ||
1272 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 1263 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
1273 | /* avoid an infiniute loop after retry */ | ||
1274 | if (capsnap->follows < next_follows) | ||
1275 | continue; | ||
1276 | /* | 1264 | /* |
1277 | * we need to wait for sync writes to complete and for dirty | 1265 | * we need to wait for sync writes to complete and for dirty |
1278 | * pages to be written out. | 1266 | * pages to be written out. |
@@ -1283,53 +1271,18 @@ retry: | |||
1283 | /* should be removed by ceph_try_drop_cap_snap() */ | 1271 | /* should be removed by ceph_try_drop_cap_snap() */ |
1284 | BUG_ON(!capsnap->need_flush); | 1272 | BUG_ON(!capsnap->need_flush); |
1285 | 1273 | ||
1286 | /* pick mds, take s_mutex */ | ||
1287 | if (ci->i_auth_cap == NULL) { | ||
1288 | dout("no auth cap (migrating?), doing nothing\n"); | ||
1289 | goto out; | ||
1290 | } | ||
1291 | |||
1292 | /* only flush each capsnap once */ | 1274 | /* only flush each capsnap once */ |
1293 | if (capsnap->cap_flush.tid > 0) { | 1275 | if (capsnap->cap_flush.tid > 0) { |
1294 | dout("already flushed %p, skipping\n", capsnap); | 1276 | dout(" already flushed %p, skipping\n", capsnap); |
1295 | continue; | 1277 | continue; |
1296 | } | 1278 | } |
1297 | 1279 | ||
1298 | mds = ci->i_auth_cap->session->s_mds; | ||
1299 | mseq = ci->i_auth_cap->mseq; | ||
1300 | |||
1301 | if (session && session->s_mds != mds) { | ||
1302 | dout("oops, wrong session %p mutex\n", session); | ||
1303 | |||
1304 | mutex_unlock(&session->s_mutex); | ||
1305 | ceph_put_mds_session(session); | ||
1306 | session = NULL; | ||
1307 | } | ||
1308 | if (!session) { | ||
1309 | spin_unlock(&ci->i_ceph_lock); | ||
1310 | mutex_lock(&mdsc->mutex); | ||
1311 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
1312 | mutex_unlock(&mdsc->mutex); | ||
1313 | if (session) { | ||
1314 | dout("inverting session/ino locks on %p\n", | ||
1315 | session); | ||
1316 | mutex_lock(&session->s_mutex); | ||
1317 | } | ||
1318 | /* | ||
1319 | * if session == NULL, we raced against a cap | ||
1320 | * deletion or migration. retry, and we'll | ||
1321 | * get a better @mds value next time. | ||
1322 | */ | ||
1323 | spin_lock(&ci->i_ceph_lock); | ||
1324 | goto retry; | ||
1325 | } | ||
1326 | |||
1327 | spin_lock(&mdsc->cap_dirty_lock); | 1280 | spin_lock(&mdsc->cap_dirty_lock); |
1328 | capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid; | 1281 | capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid; |
1329 | list_add_tail(&capsnap->cap_flush.g_list, | 1282 | list_add_tail(&capsnap->cap_flush.g_list, |
1330 | &mdsc->cap_flush_list); | 1283 | &mdsc->cap_flush_list); |
1331 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | 1284 | if (oldest_flush_tid == 0) |
1332 | 1285 | oldest_flush_tid = __get_oldest_flush_tid(mdsc); | |
1333 | if (list_empty(&ci->i_flushing_item)) { | 1286 | if (list_empty(&ci->i_flushing_item)) { |
1334 | list_add_tail(&ci->i_flushing_item, | 1287 | list_add_tail(&ci->i_flushing_item, |
1335 | &session->s_cap_flushing); | 1288 | &session->s_cap_flushing); |
@@ -1339,41 +1292,108 @@ retry: | |||
1339 | list_add_tail(&capsnap->cap_flush.i_list, | 1292 | list_add_tail(&capsnap->cap_flush.i_list, |
1340 | &ci->i_cap_flush_list); | 1293 | &ci->i_cap_flush_list); |
1341 | 1294 | ||
1295 | if (first_tid == 1) | ||
1296 | first_tid = capsnap->cap_flush.tid; | ||
1297 | last_tid = capsnap->cap_flush.tid; | ||
1298 | } | ||
1299 | |||
1300 | ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS; | ||
1301 | |||
1302 | while (first_tid <= last_tid) { | ||
1303 | struct ceph_cap *cap = ci->i_auth_cap; | ||
1304 | struct ceph_cap_flush *cf; | ||
1305 | int ret; | ||
1306 | |||
1307 | if (!(cap && cap->session == session)) { | ||
1308 | dout("__flush_snaps %p auth cap %p not mds%d, " | ||
1309 | "stop\n", inode, cap, session->s_mds); | ||
1310 | break; | ||
1311 | } | ||
1312 | |||
1313 | ret = -ENOENT; | ||
1314 | list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) { | ||
1315 | if (cf->tid >= first_tid) { | ||
1316 | ret = 0; | ||
1317 | break; | ||
1318 | } | ||
1319 | } | ||
1320 | if (ret < 0) | ||
1321 | break; | ||
1322 | |||
1323 | first_tid = cf->tid + 1; | ||
1324 | |||
1325 | capsnap = container_of(cf, struct ceph_cap_snap, cap_flush); | ||
1342 | atomic_inc(&capsnap->nref); | 1326 | atomic_inc(&capsnap->nref); |
1343 | spin_unlock(&ci->i_ceph_lock); | 1327 | spin_unlock(&ci->i_ceph_lock); |
1344 | 1328 | ||
1345 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", | 1329 | dout("__flush_snaps %p capsnap %p tid %llu %s\n", |
1346 | inode, capsnap, capsnap->follows, capsnap->cap_flush.tid); | 1330 | inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty)); |
1347 | __send_flush_snap(inode, session, capsnap, mseq, | ||
1348 | oldest_flush_tid); | ||
1349 | 1331 | ||
1350 | next_follows = capsnap->follows + 1; | 1332 | ret = __send_flush_snap(inode, session, capsnap, cap->mseq, |
1351 | ceph_put_cap_snap(capsnap); | 1333 | oldest_flush_tid); |
1334 | if (ret < 0) { | ||
1335 | pr_err("__flush_snaps: error sending cap flushsnap, " | ||
1336 | "ino (%llx.%llx) tid %llu follows %llu\n", | ||
1337 | ceph_vinop(inode), cf->tid, capsnap->follows); | ||
1338 | } | ||
1352 | 1339 | ||
1340 | ceph_put_cap_snap(capsnap); | ||
1353 | spin_lock(&ci->i_ceph_lock); | 1341 | spin_lock(&ci->i_ceph_lock); |
1354 | goto retry; | ||
1355 | } | 1342 | } |
1343 | } | ||
1356 | 1344 | ||
1357 | /* we flushed them all; remove this inode from the queue */ | 1345 | void ceph_flush_snaps(struct ceph_inode_info *ci, |
1358 | spin_lock(&mdsc->snap_flush_lock); | 1346 | struct ceph_mds_session **psession) |
1359 | list_del_init(&ci->i_snap_flush_item); | 1347 | { |
1360 | spin_unlock(&mdsc->snap_flush_lock); | 1348 | struct inode *inode = &ci->vfs_inode; |
1349 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
1350 | struct ceph_mds_session *session = *psession; | ||
1351 | int mds; | ||
1352 | dout("ceph_flush_snaps %p\n", inode); | ||
1353 | retry: | ||
1354 | spin_lock(&ci->i_ceph_lock); | ||
1355 | if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { | ||
1356 | dout(" no capsnap needs flush, doing nothing\n"); | ||
1357 | goto out; | ||
1358 | } | ||
1359 | if (!ci->i_auth_cap) { | ||
1360 | dout(" no auth cap (migrating?), doing nothing\n"); | ||
1361 | goto out; | ||
1362 | } | ||
1361 | 1363 | ||
1362 | out: | 1364 | mds = ci->i_auth_cap->session->s_mds; |
1363 | if (psession) | 1365 | if (session && session->s_mds != mds) { |
1364 | *psession = session; | 1366 | dout(" oops, wrong session %p mutex\n", session); |
1365 | else if (session) { | ||
1366 | mutex_unlock(&session->s_mutex); | 1367 | mutex_unlock(&session->s_mutex); |
1367 | ceph_put_mds_session(session); | 1368 | ceph_put_mds_session(session); |
1369 | session = NULL; | ||
1370 | } | ||
1371 | if (!session) { | ||
1372 | spin_unlock(&ci->i_ceph_lock); | ||
1373 | mutex_lock(&mdsc->mutex); | ||
1374 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
1375 | mutex_unlock(&mdsc->mutex); | ||
1376 | if (session) { | ||
1377 | dout(" inverting session/ino locks on %p\n", session); | ||
1378 | mutex_lock(&session->s_mutex); | ||
1379 | } | ||
1380 | goto retry; | ||
1368 | } | 1381 | } |
1369 | } | ||
1370 | 1382 | ||
1371 | static void ceph_flush_snaps(struct ceph_inode_info *ci) | 1383 | __ceph_flush_snaps(ci, session); |
1372 | { | 1384 | out: |
1373 | spin_lock(&ci->i_ceph_lock); | ||
1374 | __ceph_flush_snaps(ci, NULL); | ||
1375 | ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS; | ||
1376 | spin_unlock(&ci->i_ceph_lock); | 1385 | spin_unlock(&ci->i_ceph_lock); |
1386 | |||
1387 | if (psession) { | ||
1388 | *psession = session; | ||
1389 | } else { | ||
1390 | mutex_unlock(&session->s_mutex); | ||
1391 | ceph_put_mds_session(session); | ||
1392 | } | ||
1393 | /* we flushed them all; remove this inode from the queue */ | ||
1394 | spin_lock(&mdsc->snap_flush_lock); | ||
1395 | list_del_init(&ci->i_snap_flush_item); | ||
1396 | spin_unlock(&mdsc->snap_flush_lock); | ||
1377 | } | 1397 | } |
1378 | 1398 | ||
1379 | /* | 1399 | /* |
@@ -1768,10 +1788,9 @@ ack: | |||
1768 | oldest_flush_tid); | 1788 | oldest_flush_tid); |
1769 | ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; | 1789 | ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; |
1770 | } | 1790 | } |
1771 | if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) { | 1791 | if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) |
1772 | __ceph_flush_snaps(ci, &session); | 1792 | __ceph_flush_snaps(ci, session); |
1773 | ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS; | 1793 | |
1774 | } | ||
1775 | goto retry_locked; | 1794 | goto retry_locked; |
1776 | } | 1795 | } |
1777 | 1796 | ||
@@ -2610,7 +2629,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) | |||
2610 | if (last && !flushsnaps) | 2629 | if (last && !flushsnaps) |
2611 | ceph_check_caps(ci, 0, NULL); | 2630 | ceph_check_caps(ci, 0, NULL); |
2612 | else if (flushsnaps) | 2631 | else if (flushsnaps) |
2613 | ceph_flush_snaps(ci); | 2632 | ceph_flush_snaps(ci, NULL); |
2614 | if (wake) | 2633 | if (wake) |
2615 | wake_up_all(&ci->i_cap_wq); | 2634 | wake_up_all(&ci->i_cap_wq); |
2616 | while (put-- > 0) | 2635 | while (put-- > 0) |
@@ -2691,7 +2710,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2691 | if (last) { | 2710 | if (last) { |
2692 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); | 2711 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
2693 | } else if (flush_snaps) { | 2712 | } else if (flush_snaps) { |
2694 | ceph_flush_snaps(ci); | 2713 | ceph_flush_snaps(ci, NULL); |
2695 | } | 2714 | } |
2696 | if (complete_capsnap) | 2715 | if (complete_capsnap) |
2697 | wake_up_all(&ci->i_cap_wq); | 2716 | wake_up_all(&ci->i_cap_wq); |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c3b03ae1976c..9ff5219d849e 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -799,9 +799,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
799 | inode = &ci->vfs_inode; | 799 | inode = &ci->vfs_inode; |
800 | ihold(inode); | 800 | ihold(inode); |
801 | spin_unlock(&mdsc->snap_flush_lock); | 801 | spin_unlock(&mdsc->snap_flush_lock); |
802 | spin_lock(&ci->i_ceph_lock); | 802 | ceph_flush_snaps(ci, &session); |
803 | __ceph_flush_snaps(ci, &session); | ||
804 | spin_unlock(&ci->i_ceph_lock); | ||
805 | iput(inode); | 803 | iput(inode); |
806 | spin_lock(&mdsc->snap_flush_lock); | 804 | spin_lock(&mdsc->snap_flush_lock); |
807 | } | 805 | } |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 63fdb57606fe..b097d474f888 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -890,8 +890,8 @@ extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); | |||
890 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | 890 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); |
891 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | 891 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, |
892 | struct ceph_snap_context *snapc); | 892 | struct ceph_snap_context *snapc); |
893 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, | 893 | extern void ceph_flush_snaps(struct ceph_inode_info *ci, |
894 | struct ceph_mds_session **psession); | 894 | struct ceph_mds_session **psession); |
895 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 895 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
896 | struct ceph_mds_session *session); | 896 | struct ceph_mds_session *session); |
897 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); | 897 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); |