aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2016-07-05 09:08:07 -0400
committerIlya Dryomov <idryomov@gmail.com>2016-07-27 21:00:44 -0400
commited9b430c9ba99e70e8ddd7e08429c4c2a620ba74 (patch)
tree01af2d3659aeb5635b9e5644319bed8f9aee7aac
parent7bc00fddb9de7f78f742bc24d95e15abde15c078 (diff)
ceph: cleanup ceph_flush_snaps()
This patch devide __ceph_flush_snaps() into two stags. In the first stage, __ceph_flush_snaps() assign snapcaps flush TIDs and add them to cap flush lists. __ceph_flush_snaps() keeps holding the i_ceph_lock in this stagge. So inode's auth cap can not change. In the second stage, __ceph_flush_snaps() send flushsnap cap messages. i_ceph_lock is unlocked before sending each cap message. If auth cap changes in the middle, __ceph_flush_snaps() just stops. This is OK because kick_flushing_inode_caps() will re-send flushsnap cap messages to inode's new auth MDS. Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r--fs/ceph/caps.c185
-rw-r--r--fs/ceph/snap.c4
-rw-r--r--fs/ceph/super.h4
3 files changed, 105 insertions, 88 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 39e471d0aa50..736e1c86bcf3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1247,32 +1247,20 @@ static inline int __send_flush_snap(struct inode *inode,
1247 * 1247 *
1248 * Called under i_ceph_lock. Takes s_mutex as needed. 1248 * Called under i_ceph_lock. Takes s_mutex as needed.
1249 */ 1249 */
1250void __ceph_flush_snaps(struct ceph_inode_info *ci, 1250static void __ceph_flush_snaps(struct ceph_inode_info *ci,
1251 struct ceph_mds_session **psession) 1251 struct ceph_mds_session *session)
1252 __releases(ci->i_ceph_lock) 1252 __releases(ci->i_ceph_lock)
1253 __acquires(ci->i_ceph_lock) 1253 __acquires(ci->i_ceph_lock)
1254{ 1254{
1255 struct inode *inode = &ci->vfs_inode; 1255 struct inode *inode = &ci->vfs_inode;
1256 int mds; 1256 struct ceph_mds_client *mdsc = session->s_mdsc;
1257 struct ceph_cap_snap *capsnap; 1257 struct ceph_cap_snap *capsnap;
1258 u32 mseq; 1258 u64 oldest_flush_tid = 0;
1259 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 1259 u64 first_tid = 1, last_tid = 0;
1260 struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
1261 session->s_mutex */
1262 u64 oldest_flush_tid;
1263 u64 next_follows = 0; /* keep track of how far we've gotten through the
1264 i_cap_snaps list, and skip these entries next time
1265 around to avoid an infinite loop */
1266 1260
1267 if (psession) 1261 dout("__flush_snaps %p session %p\n", inode, session);
1268 session = *psession;
1269 1262
1270 dout("__flush_snaps %p\n", inode);
1271retry:
1272 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 1263 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
1273 /* avoid an infiniute loop after retry */
1274 if (capsnap->follows < next_follows)
1275 continue;
1276 /* 1264 /*
1277 * we need to wait for sync writes to complete and for dirty 1265 * we need to wait for sync writes to complete and for dirty
1278 * pages to be written out. 1266 * pages to be written out.
@@ -1283,53 +1271,18 @@ retry:
1283 /* should be removed by ceph_try_drop_cap_snap() */ 1271 /* should be removed by ceph_try_drop_cap_snap() */
1284 BUG_ON(!capsnap->need_flush); 1272 BUG_ON(!capsnap->need_flush);
1285 1273
1286 /* pick mds, take s_mutex */
1287 if (ci->i_auth_cap == NULL) {
1288 dout("no auth cap (migrating?), doing nothing\n");
1289 goto out;
1290 }
1291
1292 /* only flush each capsnap once */ 1274 /* only flush each capsnap once */
1293 if (capsnap->cap_flush.tid > 0) { 1275 if (capsnap->cap_flush.tid > 0) {
1294 dout("already flushed %p, skipping\n", capsnap); 1276 dout(" already flushed %p, skipping\n", capsnap);
1295 continue; 1277 continue;
1296 } 1278 }
1297 1279
1298 mds = ci->i_auth_cap->session->s_mds;
1299 mseq = ci->i_auth_cap->mseq;
1300
1301 if (session && session->s_mds != mds) {
1302 dout("oops, wrong session %p mutex\n", session);
1303
1304 mutex_unlock(&session->s_mutex);
1305 ceph_put_mds_session(session);
1306 session = NULL;
1307 }
1308 if (!session) {
1309 spin_unlock(&ci->i_ceph_lock);
1310 mutex_lock(&mdsc->mutex);
1311 session = __ceph_lookup_mds_session(mdsc, mds);
1312 mutex_unlock(&mdsc->mutex);
1313 if (session) {
1314 dout("inverting session/ino locks on %p\n",
1315 session);
1316 mutex_lock(&session->s_mutex);
1317 }
1318 /*
1319 * if session == NULL, we raced against a cap
1320 * deletion or migration. retry, and we'll
1321 * get a better @mds value next time.
1322 */
1323 spin_lock(&ci->i_ceph_lock);
1324 goto retry;
1325 }
1326
1327 spin_lock(&mdsc->cap_dirty_lock); 1280 spin_lock(&mdsc->cap_dirty_lock);
1328 capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid; 1281 capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid;
1329 list_add_tail(&capsnap->cap_flush.g_list, 1282 list_add_tail(&capsnap->cap_flush.g_list,
1330 &mdsc->cap_flush_list); 1283 &mdsc->cap_flush_list);
1331 oldest_flush_tid = __get_oldest_flush_tid(mdsc); 1284 if (oldest_flush_tid == 0)
1332 1285 oldest_flush_tid = __get_oldest_flush_tid(mdsc);
1333 if (list_empty(&ci->i_flushing_item)) { 1286 if (list_empty(&ci->i_flushing_item)) {
1334 list_add_tail(&ci->i_flushing_item, 1287 list_add_tail(&ci->i_flushing_item,
1335 &session->s_cap_flushing); 1288 &session->s_cap_flushing);
@@ -1339,41 +1292,108 @@ retry:
1339 list_add_tail(&capsnap->cap_flush.i_list, 1292 list_add_tail(&capsnap->cap_flush.i_list,
1340 &ci->i_cap_flush_list); 1293 &ci->i_cap_flush_list);
1341 1294
1295 if (first_tid == 1)
1296 first_tid = capsnap->cap_flush.tid;
1297 last_tid = capsnap->cap_flush.tid;
1298 }
1299
1300 ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS;
1301
1302 while (first_tid <= last_tid) {
1303 struct ceph_cap *cap = ci->i_auth_cap;
1304 struct ceph_cap_flush *cf;
1305 int ret;
1306
1307 if (!(cap && cap->session == session)) {
1308 dout("__flush_snaps %p auth cap %p not mds%d, "
1309 "stop\n", inode, cap, session->s_mds);
1310 break;
1311 }
1312
1313 ret = -ENOENT;
1314 list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
1315 if (cf->tid >= first_tid) {
1316 ret = 0;
1317 break;
1318 }
1319 }
1320 if (ret < 0)
1321 break;
1322
1323 first_tid = cf->tid + 1;
1324
1325 capsnap = container_of(cf, struct ceph_cap_snap, cap_flush);
1342 atomic_inc(&capsnap->nref); 1326 atomic_inc(&capsnap->nref);
1343 spin_unlock(&ci->i_ceph_lock); 1327 spin_unlock(&ci->i_ceph_lock);
1344 1328
1345 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", 1329 dout("__flush_snaps %p capsnap %p tid %llu %s\n",
1346 inode, capsnap, capsnap->follows, capsnap->cap_flush.tid); 1330 inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty));
1347 __send_flush_snap(inode, session, capsnap, mseq,
1348 oldest_flush_tid);
1349 1331
1350 next_follows = capsnap->follows + 1; 1332 ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
1351 ceph_put_cap_snap(capsnap); 1333 oldest_flush_tid);
1334 if (ret < 0) {
1335 pr_err("__flush_snaps: error sending cap flushsnap, "
1336 "ino (%llx.%llx) tid %llu follows %llu\n",
1337 ceph_vinop(inode), cf->tid, capsnap->follows);
1338 }
1352 1339
1340 ceph_put_cap_snap(capsnap);
1353 spin_lock(&ci->i_ceph_lock); 1341 spin_lock(&ci->i_ceph_lock);
1354 goto retry;
1355 } 1342 }
1343}
1356 1344
1357 /* we flushed them all; remove this inode from the queue */ 1345void ceph_flush_snaps(struct ceph_inode_info *ci,
1358 spin_lock(&mdsc->snap_flush_lock); 1346 struct ceph_mds_session **psession)
1359 list_del_init(&ci->i_snap_flush_item); 1347{
1360 spin_unlock(&mdsc->snap_flush_lock); 1348 struct inode *inode = &ci->vfs_inode;
1349 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
1350 struct ceph_mds_session *session = *psession;
1351 int mds;
1352 dout("ceph_flush_snaps %p\n", inode);
1353retry:
1354 spin_lock(&ci->i_ceph_lock);
1355 if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
1356 dout(" no capsnap needs flush, doing nothing\n");
1357 goto out;
1358 }
1359 if (!ci->i_auth_cap) {
1360 dout(" no auth cap (migrating?), doing nothing\n");
1361 goto out;
1362 }
1361 1363
1362out: 1364 mds = ci->i_auth_cap->session->s_mds;
1363 if (psession) 1365 if (session && session->s_mds != mds) {
1364 *psession = session; 1366 dout(" oops, wrong session %p mutex\n", session);
1365 else if (session) {
1366 mutex_unlock(&session->s_mutex); 1367 mutex_unlock(&session->s_mutex);
1367 ceph_put_mds_session(session); 1368 ceph_put_mds_session(session);
1369 session = NULL;
1370 }
1371 if (!session) {
1372 spin_unlock(&ci->i_ceph_lock);
1373 mutex_lock(&mdsc->mutex);
1374 session = __ceph_lookup_mds_session(mdsc, mds);
1375 mutex_unlock(&mdsc->mutex);
1376 if (session) {
1377 dout(" inverting session/ino locks on %p\n", session);
1378 mutex_lock(&session->s_mutex);
1379 }
1380 goto retry;
1368 } 1381 }
1369}
1370 1382
1371static void ceph_flush_snaps(struct ceph_inode_info *ci) 1383 __ceph_flush_snaps(ci, session);
1372{ 1384out:
1373 spin_lock(&ci->i_ceph_lock);
1374 __ceph_flush_snaps(ci, NULL);
1375 ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS;
1376 spin_unlock(&ci->i_ceph_lock); 1385 spin_unlock(&ci->i_ceph_lock);
1386
1387 if (psession) {
1388 *psession = session;
1389 } else {
1390 mutex_unlock(&session->s_mutex);
1391 ceph_put_mds_session(session);
1392 }
1393 /* we flushed them all; remove this inode from the queue */
1394 spin_lock(&mdsc->snap_flush_lock);
1395 list_del_init(&ci->i_snap_flush_item);
1396 spin_unlock(&mdsc->snap_flush_lock);
1377} 1397}
1378 1398
1379/* 1399/*
@@ -1768,10 +1788,9 @@ ack:
1768 oldest_flush_tid); 1788 oldest_flush_tid);
1769 ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; 1789 ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
1770 } 1790 }
1771 if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) { 1791 if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
1772 __ceph_flush_snaps(ci, &session); 1792 __ceph_flush_snaps(ci, session);
1773 ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS; 1793
1774 }
1775 goto retry_locked; 1794 goto retry_locked;
1776 } 1795 }
1777 1796
@@ -2610,7 +2629,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2610 if (last && !flushsnaps) 2629 if (last && !flushsnaps)
2611 ceph_check_caps(ci, 0, NULL); 2630 ceph_check_caps(ci, 0, NULL);
2612 else if (flushsnaps) 2631 else if (flushsnaps)
2613 ceph_flush_snaps(ci); 2632 ceph_flush_snaps(ci, NULL);
2614 if (wake) 2633 if (wake)
2615 wake_up_all(&ci->i_cap_wq); 2634 wake_up_all(&ci->i_cap_wq);
2616 while (put-- > 0) 2635 while (put-- > 0)
@@ -2691,7 +2710,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2691 if (last) { 2710 if (last) {
2692 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2711 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2693 } else if (flush_snaps) { 2712 } else if (flush_snaps) {
2694 ceph_flush_snaps(ci); 2713 ceph_flush_snaps(ci, NULL);
2695 } 2714 }
2696 if (complete_capsnap) 2715 if (complete_capsnap)
2697 wake_up_all(&ci->i_cap_wq); 2716 wake_up_all(&ci->i_cap_wq);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index c3b03ae1976c..9ff5219d849e 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -799,9 +799,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
799 inode = &ci->vfs_inode; 799 inode = &ci->vfs_inode;
800 ihold(inode); 800 ihold(inode);
801 spin_unlock(&mdsc->snap_flush_lock); 801 spin_unlock(&mdsc->snap_flush_lock);
802 spin_lock(&ci->i_ceph_lock); 802 ceph_flush_snaps(ci, &session);
803 __ceph_flush_snaps(ci, &session);
804 spin_unlock(&ci->i_ceph_lock);
805 iput(inode); 803 iput(inode);
806 spin_lock(&mdsc->snap_flush_lock); 804 spin_lock(&mdsc->snap_flush_lock);
807 } 805 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 63fdb57606fe..b097d474f888 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -890,8 +890,8 @@ extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
890extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); 890extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
891extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, 891extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
892 struct ceph_snap_context *snapc); 892 struct ceph_snap_context *snapc);
893extern void __ceph_flush_snaps(struct ceph_inode_info *ci, 893extern void ceph_flush_snaps(struct ceph_inode_info *ci,
894 struct ceph_mds_session **psession); 894 struct ceph_mds_session **psession);
895extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, 895extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
896 struct ceph_mds_session *session); 896 struct ceph_mds_session *session);
897extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); 897extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);