summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2015-05-05 09:22:13 -0400
committerIlya Dryomov <idryomov@gmail.com>2015-06-25 04:49:29 -0400
commitaffbc19a68f9966ad65a773db405f78e2bafc07b (patch)
tree63c34c40700e8b1fe1a73f1df244f3143b7aa99f
parent622f3e250f498976ad4cbae6f2be5cb359ded4f5 (diff)
ceph: make sure syncfs flushes all cap snaps
Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r--fs/ceph/caps.c18
-rw-r--r--fs/ceph/mds_client.c86
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/snap.c2
4 files changed, 76 insertions, 31 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 900c05fd77d8..bbd969e16a01 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1259,14 +1259,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1259 * asynchronously back to the MDS once sync writes complete and dirty 1259 * asynchronously back to the MDS once sync writes complete and dirty
1260 * data is written out. 1260 * data is written out.
1261 * 1261 *
1262 * Unless @again is true, skip cap_snaps that were already sent to 1262 * Unless @kick is true, skip cap_snaps that were already sent to
1263 * the MDS (i.e., during this session). 1263 * the MDS (i.e., during this session).
1264 * 1264 *
1265 * Called under i_ceph_lock. Takes s_mutex as needed. 1265 * Called under i_ceph_lock. Takes s_mutex as needed.
1266 */ 1266 */
1267void __ceph_flush_snaps(struct ceph_inode_info *ci, 1267void __ceph_flush_snaps(struct ceph_inode_info *ci,
1268 struct ceph_mds_session **psession, 1268 struct ceph_mds_session **psession,
1269 int again) 1269 int kick)
1270 __releases(ci->i_ceph_lock) 1270 __releases(ci->i_ceph_lock)
1271 __acquires(ci->i_ceph_lock) 1271 __acquires(ci->i_ceph_lock)
1272{ 1272{
@@ -1307,7 +1307,7 @@ retry:
1307 } 1307 }
1308 1308
1309 /* only flush each capsnap once */ 1309 /* only flush each capsnap once */
1310 if (!again && !list_empty(&capsnap->flushing_item)) { 1310 if (!kick && !list_empty(&capsnap->flushing_item)) {
1311 dout("already flushed %p, skipping\n", capsnap); 1311 dout("already flushed %p, skipping\n", capsnap);
1312 continue; 1312 continue;
1313 } 1313 }
@@ -1317,6 +1317,9 @@ retry:
1317 1317
1318 if (session && session->s_mds != mds) { 1318 if (session && session->s_mds != mds) {
1319 dout("oops, wrong session %p mutex\n", session); 1319 dout("oops, wrong session %p mutex\n", session);
1320 if (kick)
1321 goto out;
1322
1320 mutex_unlock(&session->s_mutex); 1323 mutex_unlock(&session->s_mutex);
1321 ceph_put_mds_session(session); 1324 ceph_put_mds_session(session);
1322 session = NULL; 1325 session = NULL;
@@ -1342,10 +1345,9 @@ retry:
1342 1345
1343 capsnap->flush_tid = ++ci->i_cap_flush_last_tid; 1346 capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
1344 atomic_inc(&capsnap->nref); 1347 atomic_inc(&capsnap->nref);
1345 if (!list_empty(&capsnap->flushing_item)) 1348 if (list_empty(&capsnap->flushing_item))
1346 list_del_init(&capsnap->flushing_item); 1349 list_add_tail(&capsnap->flushing_item,
1347 list_add_tail(&capsnap->flushing_item, 1350 &session->s_cap_snaps_flushing);
1348 &session->s_cap_snaps_flushing);
1349 spin_unlock(&ci->i_ceph_lock); 1351 spin_unlock(&ci->i_ceph_lock);
1350 1352
1351 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", 1353 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
@@ -2876,6 +2878,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2876 struct ceph_mds_session *session) 2878 struct ceph_mds_session *session)
2877{ 2879{
2878 struct ceph_inode_info *ci = ceph_inode(inode); 2880 struct ceph_inode_info *ci = ceph_inode(inode);
2881 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
2879 u64 follows = le64_to_cpu(m->snap_follows); 2882 u64 follows = le64_to_cpu(m->snap_follows);
2880 struct ceph_cap_snap *capsnap; 2883 struct ceph_cap_snap *capsnap;
2881 int drop = 0; 2884 int drop = 0;
@@ -2899,6 +2902,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2899 list_del(&capsnap->ci_item); 2902 list_del(&capsnap->ci_item);
2900 list_del(&capsnap->flushing_item); 2903 list_del(&capsnap->flushing_item);
2901 ceph_put_cap_snap(capsnap); 2904 ceph_put_cap_snap(capsnap);
2905 wake_up_all(&mdsc->cap_flushing_wq);
2902 drop = 1; 2906 drop = 1;
2903 break; 2907 break;
2904 } else { 2908 } else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 88010f9a254d..2bb9264b9225 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1488,17 +1488,22 @@ out_unlocked:
1488 return err; 1488 return err;
1489} 1489}
1490 1490
1491static int check_cap_flush(struct inode *inode, u64 want_flush_seq) 1491static int check_cap_flush(struct ceph_inode_info *ci,
1492 u64 want_flush_seq, u64 want_snap_seq)
1492{ 1493{
1493 struct ceph_inode_info *ci = ceph_inode(inode); 1494 int ret1 = 1, ret2 = 1;
1494 int ret;
1495 spin_lock(&ci->i_ceph_lock); 1495 spin_lock(&ci->i_ceph_lock);
1496 if (ci->i_flushing_caps) 1496 if (want_flush_seq > 0 && ci->i_flushing_caps)
1497 ret = ci->i_cap_flush_seq >= want_flush_seq; 1497 ret1 = ci->i_cap_flush_seq >= want_flush_seq;
1498 else 1498
1499 ret = 1; 1499 if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
1500 struct ceph_cap_snap *capsnap =
1501 list_first_entry(&ci->i_cap_snaps,
1502 struct ceph_cap_snap, ci_item);
1503 ret2 = capsnap->follows >= want_snap_seq;
1504 }
1500 spin_unlock(&ci->i_ceph_lock); 1505 spin_unlock(&ci->i_ceph_lock);
1501 return ret; 1506 return ret1 && ret2;
1502} 1507}
1503 1508
1504/* 1509/*
@@ -1506,45 +1511,72 @@ static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
1506 * 1511 *
1507 * returns true if we've flushed through want_flush_seq 1512 * returns true if we've flushed through want_flush_seq
1508 */ 1513 */
1509static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) 1514static void wait_caps_flush(struct ceph_mds_client *mdsc,
1515 u64 want_flush_seq, u64 want_snap_seq)
1510{ 1516{
1511 int mds; 1517 int mds;
1512 1518
1513 dout("check_cap_flush want %lld\n", want_flush_seq); 1519 dout("check_cap_flush want %lld\n", want_flush_seq);
1514 mutex_lock(&mdsc->mutex); 1520 mutex_lock(&mdsc->mutex);
1515 for (mds = 0; mds < mdsc->max_sessions; mds++) { 1521 for (mds = 0; mds < mdsc->max_sessions; ) {
1516 struct ceph_mds_session *session = mdsc->sessions[mds]; 1522 struct ceph_mds_session *session = mdsc->sessions[mds];
1517 struct inode *inode = NULL; 1523 struct inode *inode1 = NULL, *inode2 = NULL;
1518 1524
1519 if (!session) 1525 if (!session) {
1526 mds++;
1520 continue; 1527 continue;
1528 }
1521 get_session(session); 1529 get_session(session);
1522 mutex_unlock(&mdsc->mutex); 1530 mutex_unlock(&mdsc->mutex);
1523 1531
1524 mutex_lock(&session->s_mutex); 1532 mutex_lock(&session->s_mutex);
1525 if (!list_empty(&session->s_cap_flushing)) { 1533 if (!list_empty(&session->s_cap_flushing)) {
1526 struct ceph_inode_info *ci = 1534 struct ceph_inode_info *ci =
1527 list_entry(session->s_cap_flushing.next, 1535 list_first_entry(&session->s_cap_flushing,
1528 struct ceph_inode_info, 1536 struct ceph_inode_info,
1529 i_flushing_item); 1537 i_flushing_item);
1530 1538
1531 if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { 1539 if (!check_cap_flush(ci, want_flush_seq, 0)) {
1532 dout("check_cap_flush still flushing %p " 1540 dout("check_cap_flush still flushing %p "
1533 "seq %lld <= %lld to mds%d\n", 1541 "seq %lld <= %lld to mds%d\n",
1534 &ci->vfs_inode, ci->i_cap_flush_seq, 1542 &ci->vfs_inode, ci->i_cap_flush_seq,
1535 want_flush_seq, session->s_mds); 1543 want_flush_seq, mds);
1536 inode = igrab(&ci->vfs_inode); 1544 inode1 = igrab(&ci->vfs_inode);
1545 }
1546 }
1547 if (!list_empty(&session->s_cap_snaps_flushing)) {
1548 struct ceph_cap_snap *capsnap =
1549 list_first_entry(&session->s_cap_snaps_flushing,
1550 struct ceph_cap_snap,
1551 flushing_item);
1552 struct ceph_inode_info *ci = capsnap->ci;
1553 if (!check_cap_flush(ci, 0, want_snap_seq)) {
1554 dout("check_cap_flush still flushing snap %p "
1555 "follows %lld <= %lld to mds%d\n",
1556 &ci->vfs_inode, capsnap->follows,
1557 want_snap_seq, mds);
1558 inode2 = igrab(&ci->vfs_inode);
1537 } 1559 }
1538 } 1560 }
1539 mutex_unlock(&session->s_mutex); 1561 mutex_unlock(&session->s_mutex);
1540 ceph_put_mds_session(session); 1562 ceph_put_mds_session(session);
1541 1563
1542 if (inode) { 1564 if (inode1) {
1543 wait_event(mdsc->cap_flushing_wq, 1565 wait_event(mdsc->cap_flushing_wq,
1544 check_cap_flush(inode, want_flush_seq)); 1566 check_cap_flush(ceph_inode(inode1),
1545 iput(inode); 1567 want_flush_seq, 0));
1568 iput(inode1);
1569 }
1570 if (inode2) {
1571 wait_event(mdsc->cap_flushing_wq,
1572 check_cap_flush(ceph_inode(inode2),
1573 0, want_snap_seq));
1574 iput(inode2);
1546 } 1575 }
1547 1576
1577 if (!inode1 && !inode2)
1578 mds++;
1579
1548 mutex_lock(&mdsc->mutex); 1580 mutex_lock(&mdsc->mutex);
1549 } 1581 }
1550 1582
@@ -3391,6 +3423,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
3391 atomic_set(&mdsc->num_sessions, 0); 3423 atomic_set(&mdsc->num_sessions, 0);
3392 mdsc->max_sessions = 0; 3424 mdsc->max_sessions = 0;
3393 mdsc->stopping = 0; 3425 mdsc->stopping = 0;
3426 mdsc->last_snap_seq = 0;
3394 init_rwsem(&mdsc->snap_rwsem); 3427 init_rwsem(&mdsc->snap_rwsem);
3395 mdsc->snap_realms = RB_ROOT; 3428 mdsc->snap_realms = RB_ROOT;
3396 INIT_LIST_HEAD(&mdsc->snap_empty); 3429 INIT_LIST_HEAD(&mdsc->snap_empty);
@@ -3517,7 +3550,7 @@ restart:
3517 3550
3518void ceph_mdsc_sync(struct ceph_mds_client *mdsc) 3551void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
3519{ 3552{
3520 u64 want_tid, want_flush; 3553 u64 want_tid, want_flush, want_snap;
3521 3554
3522 if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) 3555 if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
3523 return; 3556 return;
@@ -3532,10 +3565,15 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
3532 want_flush = mdsc->cap_flush_seq; 3565 want_flush = mdsc->cap_flush_seq;
3533 spin_unlock(&mdsc->cap_dirty_lock); 3566 spin_unlock(&mdsc->cap_dirty_lock);
3534 3567
3535 dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); 3568 down_read(&mdsc->snap_rwsem);
3569 want_snap = mdsc->last_snap_seq;
3570 up_read(&mdsc->snap_rwsem);
3571
3572 dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
3573 want_tid, want_flush, want_snap);
3536 3574
3537 wait_unsafe_requests(mdsc, want_tid); 3575 wait_unsafe_requests(mdsc, want_tid);
3538 wait_caps_flush(mdsc, want_flush); 3576 wait_caps_flush(mdsc, want_flush, want_snap);
3539} 3577}
3540 3578
3541/* 3579/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d474141c034a..bf24d88cfeb2 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -290,6 +290,7 @@ struct ceph_mds_client {
290 * references (implying they contain no inodes with caps) that 290 * references (implying they contain no inodes with caps) that
291 * should be destroyed. 291 * should be destroyed.
292 */ 292 */
293 u64 last_snap_seq;
293 struct rw_semaphore snap_rwsem; 294 struct rw_semaphore snap_rwsem;
294 struct rb_root snap_realms; 295 struct rb_root snap_realms;
295 struct list_head snap_empty; 296 struct list_head snap_empty;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index ba708017d60b..233d906aec02 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -730,6 +730,8 @@ more:
730 730
731 /* queue realm for cap_snap creation */ 731 /* queue realm for cap_snap creation */
732 list_add(&realm->dirty_item, &dirty_realms); 732 list_add(&realm->dirty_item, &dirty_realms);
733 if (realm->seq > mdsc->last_snap_seq)
734 mdsc->last_snap_seq = realm->seq;
733 735
734 invalidate = 1; 736 invalidate = 1;
735 } else if (!realm->cached_context) { 737 } else if (!realm->cached_context) {