diff options
author | David S. Miller <davem@davemloft.net> | 2015-03-03 21:16:48 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-03-03 21:16:48 -0500 |
commit | 71a83a6db6138b9d41d8a0b6b91cb59f6dc4742c (patch) | |
tree | f74b6e4e48257ec6ce40b95645ecb8533b9cc1f8 /fs/ceph/mds_client.c | |
parent | b97526f3ff95f92b107f0fb52cbb8627e395429b (diff) | |
parent | a6c5170d1edea97c538c81e377e56c7b5c5b7e63 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts:
drivers/net/ethernet/rocker/rocker.c
The rocker commit was two overlapping changes, one to rename
the ->vport member to ->pport, and another making the bitmask
expression use '1ULL' instead of plain '1'.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r-- | fs/ceph/mds_client.c | 127 |
1 files changed, 93 insertions, 34 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5f62fb7a5d0a..71c073f38e54 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -480,6 +480,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
480 | mdsc->max_sessions = newmax; | 480 | mdsc->max_sessions = newmax; |
481 | } | 481 | } |
482 | mdsc->sessions[mds] = s; | 482 | mdsc->sessions[mds] = s; |
483 | atomic_inc(&mdsc->num_sessions); | ||
483 | atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ | 484 | atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ |
484 | 485 | ||
485 | ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, | 486 | ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, |
@@ -503,6 +504,7 @@ static void __unregister_session(struct ceph_mds_client *mdsc, | |||
503 | mdsc->sessions[s->s_mds] = NULL; | 504 | mdsc->sessions[s->s_mds] = NULL; |
504 | ceph_con_close(&s->s_con); | 505 | ceph_con_close(&s->s_con); |
505 | ceph_put_mds_session(s); | 506 | ceph_put_mds_session(s); |
507 | atomic_dec(&mdsc->num_sessions); | ||
506 | } | 508 | } |
507 | 509 | ||
508 | /* | 510 | /* |
@@ -842,8 +844,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 | |||
842 | struct ceph_options *opt = mdsc->fsc->client->options; | 844 | struct ceph_options *opt = mdsc->fsc->client->options; |
843 | void *p; | 845 | void *p; |
844 | 846 | ||
845 | const char* metadata[3][2] = { | 847 | const char* metadata[][2] = { |
846 | {"hostname", utsname()->nodename}, | 848 | {"hostname", utsname()->nodename}, |
849 | {"kernel_version", utsname()->release}, | ||
847 | {"entity_id", opt->name ? opt->name : ""}, | 850 | {"entity_id", opt->name ? opt->name : ""}, |
848 | {NULL, NULL} | 851 | {NULL, NULL} |
849 | }; | 852 | }; |
@@ -1464,19 +1467,33 @@ out_unlocked: | |||
1464 | return err; | 1467 | return err; |
1465 | } | 1468 | } |
1466 | 1469 | ||
1470 | static int check_cap_flush(struct inode *inode, u64 want_flush_seq) | ||
1471 | { | ||
1472 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1473 | int ret; | ||
1474 | spin_lock(&ci->i_ceph_lock); | ||
1475 | if (ci->i_flushing_caps) | ||
1476 | ret = ci->i_cap_flush_seq >= want_flush_seq; | ||
1477 | else | ||
1478 | ret = 1; | ||
1479 | spin_unlock(&ci->i_ceph_lock); | ||
1480 | return ret; | ||
1481 | } | ||
1482 | |||
1467 | /* | 1483 | /* |
1468 | * flush all dirty inode data to disk. | 1484 | * flush all dirty inode data to disk. |
1469 | * | 1485 | * |
1470 | * returns true if we've flushed through want_flush_seq | 1486 | * returns true if we've flushed through want_flush_seq |
1471 | */ | 1487 | */ |
1472 | static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) | 1488 | static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) |
1473 | { | 1489 | { |
1474 | int mds, ret = 1; | 1490 | int mds; |
1475 | 1491 | ||
1476 | dout("check_cap_flush want %lld\n", want_flush_seq); | 1492 | dout("check_cap_flush want %lld\n", want_flush_seq); |
1477 | mutex_lock(&mdsc->mutex); | 1493 | mutex_lock(&mdsc->mutex); |
1478 | for (mds = 0; ret && mds < mdsc->max_sessions; mds++) { | 1494 | for (mds = 0; mds < mdsc->max_sessions; mds++) { |
1479 | struct ceph_mds_session *session = mdsc->sessions[mds]; | 1495 | struct ceph_mds_session *session = mdsc->sessions[mds]; |
1496 | struct inode *inode = NULL; | ||
1480 | 1497 | ||
1481 | if (!session) | 1498 | if (!session) |
1482 | continue; | 1499 | continue; |
@@ -1489,29 +1506,29 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) | |||
1489 | list_entry(session->s_cap_flushing.next, | 1506 | list_entry(session->s_cap_flushing.next, |
1490 | struct ceph_inode_info, | 1507 | struct ceph_inode_info, |
1491 | i_flushing_item); | 1508 | i_flushing_item); |
1492 | struct inode *inode = &ci->vfs_inode; | ||
1493 | 1509 | ||
1494 | spin_lock(&ci->i_ceph_lock); | 1510 | if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { |
1495 | if (ci->i_cap_flush_seq <= want_flush_seq) { | ||
1496 | dout("check_cap_flush still flushing %p " | 1511 | dout("check_cap_flush still flushing %p " |
1497 | "seq %lld <= %lld to mds%d\n", inode, | 1512 | "seq %lld <= %lld to mds%d\n", |
1498 | ci->i_cap_flush_seq, want_flush_seq, | 1513 | &ci->vfs_inode, ci->i_cap_flush_seq, |
1499 | session->s_mds); | 1514 | want_flush_seq, session->s_mds); |
1500 | ret = 0; | 1515 | inode = igrab(&ci->vfs_inode); |
1501 | } | 1516 | } |
1502 | spin_unlock(&ci->i_ceph_lock); | ||
1503 | } | 1517 | } |
1504 | mutex_unlock(&session->s_mutex); | 1518 | mutex_unlock(&session->s_mutex); |
1505 | ceph_put_mds_session(session); | 1519 | ceph_put_mds_session(session); |
1506 | 1520 | ||
1507 | if (!ret) | 1521 | if (inode) { |
1508 | return ret; | 1522 | wait_event(mdsc->cap_flushing_wq, |
1523 | check_cap_flush(inode, want_flush_seq)); | ||
1524 | iput(inode); | ||
1525 | } | ||
1526 | |||
1509 | mutex_lock(&mdsc->mutex); | 1527 | mutex_lock(&mdsc->mutex); |
1510 | } | 1528 | } |
1511 | 1529 | ||
1512 | mutex_unlock(&mdsc->mutex); | 1530 | mutex_unlock(&mdsc->mutex); |
1513 | dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); | 1531 | dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); |
1514 | return ret; | ||
1515 | } | 1532 | } |
1516 | 1533 | ||
1517 | /* | 1534 | /* |
@@ -1923,7 +1940,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1923 | head->num_releases = cpu_to_le16(releases); | 1940 | head->num_releases = cpu_to_le16(releases); |
1924 | 1941 | ||
1925 | /* time stamp */ | 1942 | /* time stamp */ |
1926 | ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); | 1943 | { |
1944 | struct ceph_timespec ts; | ||
1945 | ceph_encode_timespec(&ts, &req->r_stamp); | ||
1946 | ceph_encode_copy(&p, &ts, sizeof(ts)); | ||
1947 | } | ||
1927 | 1948 | ||
1928 | BUG_ON(p > end); | 1949 | BUG_ON(p > end); |
1929 | msg->front.iov_len = p - msg->front.iov_base; | 1950 | msg->front.iov_len = p - msg->front.iov_base; |
@@ -2012,7 +2033,11 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
2012 | 2033 | ||
2013 | /* time stamp */ | 2034 | /* time stamp */ |
2014 | p = msg->front.iov_base + req->r_request_release_offset; | 2035 | p = msg->front.iov_base + req->r_request_release_offset; |
2015 | ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); | 2036 | { |
2037 | struct ceph_timespec ts; | ||
2038 | ceph_encode_timespec(&ts, &req->r_stamp); | ||
2039 | ceph_encode_copy(&p, &ts, sizeof(ts)); | ||
2040 | } | ||
2016 | 2041 | ||
2017 | msg->front.iov_len = p - msg->front.iov_base; | 2042 | msg->front.iov_len = p - msg->front.iov_base; |
2018 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 2043 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
@@ -2159,6 +2184,8 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) | |||
2159 | p = rb_next(p); | 2184 | p = rb_next(p); |
2160 | if (req->r_got_unsafe) | 2185 | if (req->r_got_unsafe) |
2161 | continue; | 2186 | continue; |
2187 | if (req->r_attempts > 0) | ||
2188 | continue; /* only new requests */ | ||
2162 | if (req->r_session && | 2189 | if (req->r_session && |
2163 | req->r_session->s_mds == mds) { | 2190 | req->r_session->s_mds == mds) { |
2164 | dout(" kicking tid %llu\n", req->r_tid); | 2191 | dout(" kicking tid %llu\n", req->r_tid); |
@@ -2286,6 +2313,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2286 | struct ceph_mds_request *req; | 2313 | struct ceph_mds_request *req; |
2287 | struct ceph_mds_reply_head *head = msg->front.iov_base; | 2314 | struct ceph_mds_reply_head *head = msg->front.iov_base; |
2288 | struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ | 2315 | struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ |
2316 | struct ceph_snap_realm *realm; | ||
2289 | u64 tid; | 2317 | u64 tid; |
2290 | int err, result; | 2318 | int err, result; |
2291 | int mds = session->s_mds; | 2319 | int mds = session->s_mds; |
@@ -2401,11 +2429,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2401 | } | 2429 | } |
2402 | 2430 | ||
2403 | /* snap trace */ | 2431 | /* snap trace */ |
2432 | realm = NULL; | ||
2404 | if (rinfo->snapblob_len) { | 2433 | if (rinfo->snapblob_len) { |
2405 | down_write(&mdsc->snap_rwsem); | 2434 | down_write(&mdsc->snap_rwsem); |
2406 | ceph_update_snap_trace(mdsc, rinfo->snapblob, | 2435 | ceph_update_snap_trace(mdsc, rinfo->snapblob, |
2407 | rinfo->snapblob + rinfo->snapblob_len, | 2436 | rinfo->snapblob + rinfo->snapblob_len, |
2408 | le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP); | 2437 | le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP, |
2438 | &realm); | ||
2409 | downgrade_write(&mdsc->snap_rwsem); | 2439 | downgrade_write(&mdsc->snap_rwsem); |
2410 | } else { | 2440 | } else { |
2411 | down_read(&mdsc->snap_rwsem); | 2441 | down_read(&mdsc->snap_rwsem); |
@@ -2423,6 +2453,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2423 | mutex_unlock(&req->r_fill_mutex); | 2453 | mutex_unlock(&req->r_fill_mutex); |
2424 | 2454 | ||
2425 | up_read(&mdsc->snap_rwsem); | 2455 | up_read(&mdsc->snap_rwsem); |
2456 | if (realm) | ||
2457 | ceph_put_snap_realm(mdsc, realm); | ||
2426 | out_err: | 2458 | out_err: |
2427 | mutex_lock(&mdsc->mutex); | 2459 | mutex_lock(&mdsc->mutex); |
2428 | if (!req->r_aborted) { | 2460 | if (!req->r_aborted) { |
@@ -2487,6 +2519,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, | |||
2487 | dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); | 2519 | dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); |
2488 | BUG_ON(req->r_err); | 2520 | BUG_ON(req->r_err); |
2489 | BUG_ON(req->r_got_result); | 2521 | BUG_ON(req->r_got_result); |
2522 | req->r_attempts = 0; | ||
2490 | req->r_num_fwd = fwd_seq; | 2523 | req->r_num_fwd = fwd_seq; |
2491 | req->r_resend_mds = next_mds; | 2524 | req->r_resend_mds = next_mds; |
2492 | put_request_session(req); | 2525 | put_request_session(req); |
@@ -2580,6 +2613,14 @@ static void handle_session(struct ceph_mds_session *session, | |||
2580 | send_flushmsg_ack(mdsc, session, seq); | 2613 | send_flushmsg_ack(mdsc, session, seq); |
2581 | break; | 2614 | break; |
2582 | 2615 | ||
2616 | case CEPH_SESSION_FORCE_RO: | ||
2617 | dout("force_session_readonly %p\n", session); | ||
2618 | spin_lock(&session->s_cap_lock); | ||
2619 | session->s_readonly = true; | ||
2620 | spin_unlock(&session->s_cap_lock); | ||
2621 | wake_up_session_caps(session, 0); | ||
2622 | break; | ||
2623 | |||
2583 | default: | 2624 | default: |
2584 | pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); | 2625 | pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); |
2585 | WARN_ON(1); | 2626 | WARN_ON(1); |
@@ -2610,6 +2651,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2610 | struct ceph_mds_session *session) | 2651 | struct ceph_mds_session *session) |
2611 | { | 2652 | { |
2612 | struct ceph_mds_request *req, *nreq; | 2653 | struct ceph_mds_request *req, *nreq; |
2654 | struct rb_node *p; | ||
2613 | int err; | 2655 | int err; |
2614 | 2656 | ||
2615 | dout("replay_unsafe_requests mds%d\n", session->s_mds); | 2657 | dout("replay_unsafe_requests mds%d\n", session->s_mds); |
@@ -2622,6 +2664,28 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2622 | ceph_con_send(&session->s_con, req->r_request); | 2664 | ceph_con_send(&session->s_con, req->r_request); |
2623 | } | 2665 | } |
2624 | } | 2666 | } |
2667 | |||
2668 | /* | ||
2669 | * also re-send old requests when MDS enters reconnect stage. So that MDS | ||
2670 | * can process completed request in clientreplay stage. | ||
2671 | */ | ||
2672 | p = rb_first(&mdsc->request_tree); | ||
2673 | while (p) { | ||
2674 | req = rb_entry(p, struct ceph_mds_request, r_node); | ||
2675 | p = rb_next(p); | ||
2676 | if (req->r_got_unsafe) | ||
2677 | continue; | ||
2678 | if (req->r_attempts == 0) | ||
2679 | continue; /* only old requests */ | ||
2680 | if (req->r_session && | ||
2681 | req->r_session->s_mds == session->s_mds) { | ||
2682 | err = __prepare_send_request(mdsc, req, session->s_mds); | ||
2683 | if (!err) { | ||
2684 | ceph_msg_get(req->r_request); | ||
2685 | ceph_con_send(&session->s_con, req->r_request); | ||
2686 | } | ||
2687 | } | ||
2688 | } | ||
2625 | mutex_unlock(&mdsc->mutex); | 2689 | mutex_unlock(&mdsc->mutex); |
2626 | } | 2690 | } |
2627 | 2691 | ||
@@ -2787,6 +2851,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2787 | spin_unlock(&session->s_gen_ttl_lock); | 2851 | spin_unlock(&session->s_gen_ttl_lock); |
2788 | 2852 | ||
2789 | spin_lock(&session->s_cap_lock); | 2853 | spin_lock(&session->s_cap_lock); |
2854 | /* don't know if session is readonly */ | ||
2855 | session->s_readonly = 0; | ||
2790 | /* | 2856 | /* |
2791 | * notify __ceph_remove_cap() that we are composing cap reconnect. | 2857 | * notify __ceph_remove_cap() that we are composing cap reconnect. |
2792 | * If a cap get released before being added to the cap reconnect, | 2858 | * If a cap get released before being added to the cap reconnect, |
@@ -2933,9 +2999,6 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2933 | mutex_unlock(&s->s_mutex); | 2999 | mutex_unlock(&s->s_mutex); |
2934 | s->s_state = CEPH_MDS_SESSION_RESTARTING; | 3000 | s->s_state = CEPH_MDS_SESSION_RESTARTING; |
2935 | } | 3001 | } |
2936 | |||
2937 | /* kick any requests waiting on the recovering mds */ | ||
2938 | kick_requests(mdsc, i); | ||
2939 | } else if (oldstate == newstate) { | 3002 | } else if (oldstate == newstate) { |
2940 | continue; /* nothing new with this mds */ | 3003 | continue; /* nothing new with this mds */ |
2941 | } | 3004 | } |
@@ -3295,6 +3358,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
3295 | init_waitqueue_head(&mdsc->session_close_wq); | 3358 | init_waitqueue_head(&mdsc->session_close_wq); |
3296 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 3359 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
3297 | mdsc->sessions = NULL; | 3360 | mdsc->sessions = NULL; |
3361 | atomic_set(&mdsc->num_sessions, 0); | ||
3298 | mdsc->max_sessions = 0; | 3362 | mdsc->max_sessions = 0; |
3299 | mdsc->stopping = 0; | 3363 | mdsc->stopping = 0; |
3300 | init_rwsem(&mdsc->snap_rwsem); | 3364 | init_rwsem(&mdsc->snap_rwsem); |
@@ -3428,14 +3492,17 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3428 | dout("sync\n"); | 3492 | dout("sync\n"); |
3429 | mutex_lock(&mdsc->mutex); | 3493 | mutex_lock(&mdsc->mutex); |
3430 | want_tid = mdsc->last_tid; | 3494 | want_tid = mdsc->last_tid; |
3431 | want_flush = mdsc->cap_flush_seq; | ||
3432 | mutex_unlock(&mdsc->mutex); | 3495 | mutex_unlock(&mdsc->mutex); |
3433 | dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); | ||
3434 | 3496 | ||
3435 | ceph_flush_dirty_caps(mdsc); | 3497 | ceph_flush_dirty_caps(mdsc); |
3498 | spin_lock(&mdsc->cap_dirty_lock); | ||
3499 | want_flush = mdsc->cap_flush_seq; | ||
3500 | spin_unlock(&mdsc->cap_dirty_lock); | ||
3501 | |||
3502 | dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); | ||
3436 | 3503 | ||
3437 | wait_unsafe_requests(mdsc, want_tid); | 3504 | wait_unsafe_requests(mdsc, want_tid); |
3438 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); | 3505 | wait_caps_flush(mdsc, want_flush); |
3439 | } | 3506 | } |
3440 | 3507 | ||
3441 | /* | 3508 | /* |
@@ -3443,17 +3510,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3443 | */ | 3510 | */ |
3444 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) | 3511 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) |
3445 | { | 3512 | { |
3446 | int i, n = 0; | ||
3447 | |||
3448 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | 3513 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
3449 | return true; | 3514 | return true; |
3450 | 3515 | return atomic_read(&mdsc->num_sessions) == 0; | |
3451 | mutex_lock(&mdsc->mutex); | ||
3452 | for (i = 0; i < mdsc->max_sessions; i++) | ||
3453 | if (mdsc->sessions[i]) | ||
3454 | n++; | ||
3455 | mutex_unlock(&mdsc->mutex); | ||
3456 | return n == 0; | ||
3457 | } | 3516 | } |
3458 | 3517 | ||
3459 | /* | 3518 | /* |