aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/mds_client.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-03 21:16:48 -0500
committerDavid S. Miller <davem@davemloft.net>2015-03-03 21:16:48 -0500
commit71a83a6db6138b9d41d8a0b6b91cb59f6dc4742c (patch)
treef74b6e4e48257ec6ce40b95645ecb8533b9cc1f8 /fs/ceph/mds_client.c
parentb97526f3ff95f92b107f0fb52cbb8627e395429b (diff)
parenta6c5170d1edea97c538c81e377e56c7b5c5b7e63 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts: drivers/net/ethernet/rocker/rocker.c The rocker commit was two overlapping changes, one to rename the ->vport member to ->pport, and another making the bitmask expression use '1ULL' instead of plain '1'. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r--fs/ceph/mds_client.c127
1 files changed, 93 insertions, 34 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5f62fb7a5d0a..71c073f38e54 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -480,6 +480,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
480 mdsc->max_sessions = newmax; 480 mdsc->max_sessions = newmax;
481 } 481 }
482 mdsc->sessions[mds] = s; 482 mdsc->sessions[mds] = s;
483 atomic_inc(&mdsc->num_sessions);
483 atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ 484 atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
484 485
485 ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, 486 ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
@@ -503,6 +504,7 @@ static void __unregister_session(struct ceph_mds_client *mdsc,
503 mdsc->sessions[s->s_mds] = NULL; 504 mdsc->sessions[s->s_mds] = NULL;
504 ceph_con_close(&s->s_con); 505 ceph_con_close(&s->s_con);
505 ceph_put_mds_session(s); 506 ceph_put_mds_session(s);
507 atomic_dec(&mdsc->num_sessions);
506} 508}
507 509
508/* 510/*
@@ -842,8 +844,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
842 struct ceph_options *opt = mdsc->fsc->client->options; 844 struct ceph_options *opt = mdsc->fsc->client->options;
843 void *p; 845 void *p;
844 846
845 const char* metadata[3][2] = { 847 const char* metadata[][2] = {
846 {"hostname", utsname()->nodename}, 848 {"hostname", utsname()->nodename},
849 {"kernel_version", utsname()->release},
847 {"entity_id", opt->name ? opt->name : ""}, 850 {"entity_id", opt->name ? opt->name : ""},
848 {NULL, NULL} 851 {NULL, NULL}
849 }; 852 };
@@ -1464,19 +1467,33 @@ out_unlocked:
1464 return err; 1467 return err;
1465} 1468}
1466 1469
1470static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
1471{
1472 struct ceph_inode_info *ci = ceph_inode(inode);
1473 int ret;
1474 spin_lock(&ci->i_ceph_lock);
1475 if (ci->i_flushing_caps)
1476 ret = ci->i_cap_flush_seq >= want_flush_seq;
1477 else
1478 ret = 1;
1479 spin_unlock(&ci->i_ceph_lock);
1480 return ret;
1481}
1482
1467/* 1483/*
1468 * flush all dirty inode data to disk. 1484 * flush all dirty inode data to disk.
1469 * 1485 *
1470 * returns true if we've flushed through want_flush_seq 1486 * returns true if we've flushed through want_flush_seq
1471 */ 1487 */
1472static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) 1488static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1473{ 1489{
1474 int mds, ret = 1; 1490 int mds;
1475 1491
1476 dout("check_cap_flush want %lld\n", want_flush_seq); 1492 dout("check_cap_flush want %lld\n", want_flush_seq);
1477 mutex_lock(&mdsc->mutex); 1493 mutex_lock(&mdsc->mutex);
1478 for (mds = 0; ret && mds < mdsc->max_sessions; mds++) { 1494 for (mds = 0; mds < mdsc->max_sessions; mds++) {
1479 struct ceph_mds_session *session = mdsc->sessions[mds]; 1495 struct ceph_mds_session *session = mdsc->sessions[mds];
1496 struct inode *inode = NULL;
1480 1497
1481 if (!session) 1498 if (!session)
1482 continue; 1499 continue;
@@ -1489,29 +1506,29 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1489 list_entry(session->s_cap_flushing.next, 1506 list_entry(session->s_cap_flushing.next,
1490 struct ceph_inode_info, 1507 struct ceph_inode_info,
1491 i_flushing_item); 1508 i_flushing_item);
1492 struct inode *inode = &ci->vfs_inode;
1493 1509
1494 spin_lock(&ci->i_ceph_lock); 1510 if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) {
1495 if (ci->i_cap_flush_seq <= want_flush_seq) {
1496 dout("check_cap_flush still flushing %p " 1511 dout("check_cap_flush still flushing %p "
1497 "seq %lld <= %lld to mds%d\n", inode, 1512 "seq %lld <= %lld to mds%d\n",
1498 ci->i_cap_flush_seq, want_flush_seq, 1513 &ci->vfs_inode, ci->i_cap_flush_seq,
1499 session->s_mds); 1514 want_flush_seq, session->s_mds);
1500 ret = 0; 1515 inode = igrab(&ci->vfs_inode);
1501 } 1516 }
1502 spin_unlock(&ci->i_ceph_lock);
1503 } 1517 }
1504 mutex_unlock(&session->s_mutex); 1518 mutex_unlock(&session->s_mutex);
1505 ceph_put_mds_session(session); 1519 ceph_put_mds_session(session);
1506 1520
1507 if (!ret) 1521 if (inode) {
1508 return ret; 1522 wait_event(mdsc->cap_flushing_wq,
1523 check_cap_flush(inode, want_flush_seq));
1524 iput(inode);
1525 }
1526
1509 mutex_lock(&mdsc->mutex); 1527 mutex_lock(&mdsc->mutex);
1510 } 1528 }
1511 1529
1512 mutex_unlock(&mdsc->mutex); 1530 mutex_unlock(&mdsc->mutex);
1513 dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); 1531 dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq);
1514 return ret;
1515} 1532}
1516 1533
1517/* 1534/*
@@ -1923,7 +1940,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1923 head->num_releases = cpu_to_le16(releases); 1940 head->num_releases = cpu_to_le16(releases);
1924 1941
1925 /* time stamp */ 1942 /* time stamp */
1926 ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); 1943 {
1944 struct ceph_timespec ts;
1945 ceph_encode_timespec(&ts, &req->r_stamp);
1946 ceph_encode_copy(&p, &ts, sizeof(ts));
1947 }
1927 1948
1928 BUG_ON(p > end); 1949 BUG_ON(p > end);
1929 msg->front.iov_len = p - msg->front.iov_base; 1950 msg->front.iov_len = p - msg->front.iov_base;
@@ -2012,7 +2033,11 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
2012 2033
2013 /* time stamp */ 2034 /* time stamp */
2014 p = msg->front.iov_base + req->r_request_release_offset; 2035 p = msg->front.iov_base + req->r_request_release_offset;
2015 ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); 2036 {
2037 struct ceph_timespec ts;
2038 ceph_encode_timespec(&ts, &req->r_stamp);
2039 ceph_encode_copy(&p, &ts, sizeof(ts));
2040 }
2016 2041
2017 msg->front.iov_len = p - msg->front.iov_base; 2042 msg->front.iov_len = p - msg->front.iov_base;
2018 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 2043 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
@@ -2159,6 +2184,8 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
2159 p = rb_next(p); 2184 p = rb_next(p);
2160 if (req->r_got_unsafe) 2185 if (req->r_got_unsafe)
2161 continue; 2186 continue;
2187 if (req->r_attempts > 0)
2188 continue; /* only new requests */
2162 if (req->r_session && 2189 if (req->r_session &&
2163 req->r_session->s_mds == mds) { 2190 req->r_session->s_mds == mds) {
2164 dout(" kicking tid %llu\n", req->r_tid); 2191 dout(" kicking tid %llu\n", req->r_tid);
@@ -2286,6 +2313,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2286 struct ceph_mds_request *req; 2313 struct ceph_mds_request *req;
2287 struct ceph_mds_reply_head *head = msg->front.iov_base; 2314 struct ceph_mds_reply_head *head = msg->front.iov_base;
2288 struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ 2315 struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */
2316 struct ceph_snap_realm *realm;
2289 u64 tid; 2317 u64 tid;
2290 int err, result; 2318 int err, result;
2291 int mds = session->s_mds; 2319 int mds = session->s_mds;
@@ -2401,11 +2429,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2401 } 2429 }
2402 2430
2403 /* snap trace */ 2431 /* snap trace */
2432 realm = NULL;
2404 if (rinfo->snapblob_len) { 2433 if (rinfo->snapblob_len) {
2405 down_write(&mdsc->snap_rwsem); 2434 down_write(&mdsc->snap_rwsem);
2406 ceph_update_snap_trace(mdsc, rinfo->snapblob, 2435 ceph_update_snap_trace(mdsc, rinfo->snapblob,
2407 rinfo->snapblob + rinfo->snapblob_len, 2436 rinfo->snapblob + rinfo->snapblob_len,
2408 le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP); 2437 le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP,
2438 &realm);
2409 downgrade_write(&mdsc->snap_rwsem); 2439 downgrade_write(&mdsc->snap_rwsem);
2410 } else { 2440 } else {
2411 down_read(&mdsc->snap_rwsem); 2441 down_read(&mdsc->snap_rwsem);
@@ -2423,6 +2453,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2423 mutex_unlock(&req->r_fill_mutex); 2453 mutex_unlock(&req->r_fill_mutex);
2424 2454
2425 up_read(&mdsc->snap_rwsem); 2455 up_read(&mdsc->snap_rwsem);
2456 if (realm)
2457 ceph_put_snap_realm(mdsc, realm);
2426out_err: 2458out_err:
2427 mutex_lock(&mdsc->mutex); 2459 mutex_lock(&mdsc->mutex);
2428 if (!req->r_aborted) { 2460 if (!req->r_aborted) {
@@ -2487,6 +2519,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
2487 dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); 2519 dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
2488 BUG_ON(req->r_err); 2520 BUG_ON(req->r_err);
2489 BUG_ON(req->r_got_result); 2521 BUG_ON(req->r_got_result);
2522 req->r_attempts = 0;
2490 req->r_num_fwd = fwd_seq; 2523 req->r_num_fwd = fwd_seq;
2491 req->r_resend_mds = next_mds; 2524 req->r_resend_mds = next_mds;
2492 put_request_session(req); 2525 put_request_session(req);
@@ -2580,6 +2613,14 @@ static void handle_session(struct ceph_mds_session *session,
2580 send_flushmsg_ack(mdsc, session, seq); 2613 send_flushmsg_ack(mdsc, session, seq);
2581 break; 2614 break;
2582 2615
2616 case CEPH_SESSION_FORCE_RO:
2617 dout("force_session_readonly %p\n", session);
2618 spin_lock(&session->s_cap_lock);
2619 session->s_readonly = true;
2620 spin_unlock(&session->s_cap_lock);
2621 wake_up_session_caps(session, 0);
2622 break;
2623
2583 default: 2624 default:
2584 pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); 2625 pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
2585 WARN_ON(1); 2626 WARN_ON(1);
@@ -2610,6 +2651,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2610 struct ceph_mds_session *session) 2651 struct ceph_mds_session *session)
2611{ 2652{
2612 struct ceph_mds_request *req, *nreq; 2653 struct ceph_mds_request *req, *nreq;
2654 struct rb_node *p;
2613 int err; 2655 int err;
2614 2656
2615 dout("replay_unsafe_requests mds%d\n", session->s_mds); 2657 dout("replay_unsafe_requests mds%d\n", session->s_mds);
@@ -2622,6 +2664,28 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2622 ceph_con_send(&session->s_con, req->r_request); 2664 ceph_con_send(&session->s_con, req->r_request);
2623 } 2665 }
2624 } 2666 }
2667
2668 /*
2669 * also re-send old requests when MDS enters reconnect stage. So that MDS
2670 * can process completed request in clientreplay stage.
2671 */
2672 p = rb_first(&mdsc->request_tree);
2673 while (p) {
2674 req = rb_entry(p, struct ceph_mds_request, r_node);
2675 p = rb_next(p);
2676 if (req->r_got_unsafe)
2677 continue;
2678 if (req->r_attempts == 0)
2679 continue; /* only old requests */
2680 if (req->r_session &&
2681 req->r_session->s_mds == session->s_mds) {
2682 err = __prepare_send_request(mdsc, req, session->s_mds);
2683 if (!err) {
2684 ceph_msg_get(req->r_request);
2685 ceph_con_send(&session->s_con, req->r_request);
2686 }
2687 }
2688 }
2625 mutex_unlock(&mdsc->mutex); 2689 mutex_unlock(&mdsc->mutex);
2626} 2690}
2627 2691
@@ -2787,6 +2851,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2787 spin_unlock(&session->s_gen_ttl_lock); 2851 spin_unlock(&session->s_gen_ttl_lock);
2788 2852
2789 spin_lock(&session->s_cap_lock); 2853 spin_lock(&session->s_cap_lock);
2854 /* don't know if session is readonly */
2855 session->s_readonly = 0;
2790 /* 2856 /*
2791 * notify __ceph_remove_cap() that we are composing cap reconnect. 2857 * notify __ceph_remove_cap() that we are composing cap reconnect.
2792 * If a cap get released before being added to the cap reconnect, 2858 * If a cap get released before being added to the cap reconnect,
@@ -2933,9 +2999,6 @@ static void check_new_map(struct ceph_mds_client *mdsc,
2933 mutex_unlock(&s->s_mutex); 2999 mutex_unlock(&s->s_mutex);
2934 s->s_state = CEPH_MDS_SESSION_RESTARTING; 3000 s->s_state = CEPH_MDS_SESSION_RESTARTING;
2935 } 3001 }
2936
2937 /* kick any requests waiting on the recovering mds */
2938 kick_requests(mdsc, i);
2939 } else if (oldstate == newstate) { 3002 } else if (oldstate == newstate) {
2940 continue; /* nothing new with this mds */ 3003 continue; /* nothing new with this mds */
2941 } 3004 }
@@ -3295,6 +3358,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
3295 init_waitqueue_head(&mdsc->session_close_wq); 3358 init_waitqueue_head(&mdsc->session_close_wq);
3296 INIT_LIST_HEAD(&mdsc->waiting_for_map); 3359 INIT_LIST_HEAD(&mdsc->waiting_for_map);
3297 mdsc->sessions = NULL; 3360 mdsc->sessions = NULL;
3361 atomic_set(&mdsc->num_sessions, 0);
3298 mdsc->max_sessions = 0; 3362 mdsc->max_sessions = 0;
3299 mdsc->stopping = 0; 3363 mdsc->stopping = 0;
3300 init_rwsem(&mdsc->snap_rwsem); 3364 init_rwsem(&mdsc->snap_rwsem);
@@ -3428,14 +3492,17 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
3428 dout("sync\n"); 3492 dout("sync\n");
3429 mutex_lock(&mdsc->mutex); 3493 mutex_lock(&mdsc->mutex);
3430 want_tid = mdsc->last_tid; 3494 want_tid = mdsc->last_tid;
3431 want_flush = mdsc->cap_flush_seq;
3432 mutex_unlock(&mdsc->mutex); 3495 mutex_unlock(&mdsc->mutex);
3433 dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
3434 3496
3435 ceph_flush_dirty_caps(mdsc); 3497 ceph_flush_dirty_caps(mdsc);
3498 spin_lock(&mdsc->cap_dirty_lock);
3499 want_flush = mdsc->cap_flush_seq;
3500 spin_unlock(&mdsc->cap_dirty_lock);
3501
3502 dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
3436 3503
3437 wait_unsafe_requests(mdsc, want_tid); 3504 wait_unsafe_requests(mdsc, want_tid);
3438 wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); 3505 wait_caps_flush(mdsc, want_flush);
3439} 3506}
3440 3507
3441/* 3508/*
@@ -3443,17 +3510,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
3443 */ 3510 */
3444static bool done_closing_sessions(struct ceph_mds_client *mdsc) 3511static bool done_closing_sessions(struct ceph_mds_client *mdsc)
3445{ 3512{
3446 int i, n = 0;
3447
3448 if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) 3513 if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
3449 return true; 3514 return true;
3450 3515 return atomic_read(&mdsc->num_sessions) == 0;
3451 mutex_lock(&mdsc->mutex);
3452 for (i = 0; i < mdsc->max_sessions; i++)
3453 if (mdsc->sessions[i])
3454 n++;
3455 mutex_unlock(&mdsc->mutex);
3456 return n == 0;
3457} 3516}
3458 3517
3459/* 3518/*