diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-11 15:33:03 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-11 15:33:03 -0400 |
| commit | e013f74b60bbd37ee8c3a55214eb351ea3101c15 (patch) | |
| tree | 096b59f550dea6df9347edf97b872dc75a79f653 | |
| parent | 01cab5549c3e9a0fe7248fc5ad0fd79361cc0d39 (diff) | |
| parent | 438386853d4c0c48fe73bf05a7d61c70ca5a3bfb (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph update from Sage Weil:
"There are a few fixes for snapshot behavior with CephFS and support
for the new keepalive protocol from Zheng, a libceph fix that affects
both RBD and CephFS, a few bug fixes and cleanups for RBD from Ilya,
and several small fixes and cleanups from Jianpeng and others"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: improve readahead for file holes
ceph: get inode size for each append write
libceph: check data_len in ->alloc_msg()
libceph: use keepalive2 to verify the mon session is alive
rbd: plug rbd_dev->header.object_prefix memory leak
rbd: fix double free on rbd_dev->header_name
libceph: set 'exists' flag for newly up osd
ceph: cleanup use of ceph_msg_get
ceph: no need to get parent inode in ceph_open
ceph: remove the useless judgement
ceph: remove redundant test of head->safe and silence static analysis warnings
ceph: fix queuing inode to mdsdir's snaprealm
libceph: rename con_work() to ceph_con_workfn()
libceph: Avoid holding the zero page on ceph_msgr_slab_init errors
libceph: remove the unused macro AES_KEY_SIZE
ceph: invalidate dirty pages after forced umount
ceph: EIO all operations after forced umount
| -rw-r--r-- | drivers/block/rbd.c | 6 | ||||
| -rw-r--r-- | fs/ceph/addr.c | 6 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 8 | ||||
| -rw-r--r-- | fs/ceph/file.c | 14 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 59 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
| -rw-r--r-- | fs/ceph/snap.c | 7 | ||||
| -rw-r--r-- | fs/ceph/super.c | 1 | ||||
| -rw-r--r-- | include/linux/ceph/libceph.h | 2 | ||||
| -rw-r--r-- | include/linux/ceph/messenger.h | 4 | ||||
| -rw-r--r-- | include/linux/ceph/msgr.h | 4 | ||||
| -rw-r--r-- | net/ceph/ceph_common.c | 1 | ||||
| -rw-r--r-- | net/ceph/crypto.c | 4 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 82 | ||||
| -rw-r--r-- | net/ceph/mon_client.c | 37 | ||||
| -rw-r--r-- | net/ceph/osd_client.c | 51 | ||||
| -rw-r--r-- | net/ceph/osdmap.c | 2 |
17 files changed, 191 insertions, 98 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 698f761037ce..d93a0372b37b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
| @@ -4673,7 +4673,10 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) | |||
| 4673 | } | 4673 | } |
| 4674 | 4674 | ||
| 4675 | ret = rbd_dev_v2_snap_context(rbd_dev); | 4675 | ret = rbd_dev_v2_snap_context(rbd_dev); |
| 4676 | dout("rbd_dev_v2_snap_context returned %d\n", ret); | 4676 | if (ret && first_time) { |
| 4677 | kfree(rbd_dev->header.object_prefix); | ||
| 4678 | rbd_dev->header.object_prefix = NULL; | ||
| 4679 | } | ||
| 4677 | 4680 | ||
| 4678 | return ret; | 4681 | return ret; |
| 4679 | } | 4682 | } |
| @@ -5154,7 +5157,6 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) | |||
| 5154 | out_err: | 5157 | out_err: |
| 5155 | if (parent) { | 5158 | if (parent) { |
| 5156 | rbd_dev_unparent(rbd_dev); | 5159 | rbd_dev_unparent(rbd_dev); |
| 5157 | kfree(rbd_dev->header_name); | ||
| 5158 | rbd_dev_destroy(parent); | 5160 | rbd_dev_destroy(parent); |
| 5159 | } else { | 5161 | } else { |
| 5160 | rbd_put_client(rbdc); | 5162 | rbd_put_client(rbdc); |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a268abfe60ac..9d23e788d1df 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -276,7 +276,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) | |||
| 276 | for (i = 0; i < num_pages; i++) { | 276 | for (i = 0; i < num_pages; i++) { |
| 277 | struct page *page = osd_data->pages[i]; | 277 | struct page *page = osd_data->pages[i]; |
| 278 | 278 | ||
| 279 | if (rc < 0) | 279 | if (rc < 0 && rc != ENOENT) |
| 280 | goto unlock; | 280 | goto unlock; |
| 281 | if (bytes < (int)PAGE_CACHE_SIZE) { | 281 | if (bytes < (int)PAGE_CACHE_SIZE) { |
| 282 | /* zero (remainder of) page */ | 282 | /* zero (remainder of) page */ |
| @@ -717,8 +717,10 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
| 717 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 717 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
| 718 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 718 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
| 719 | 719 | ||
| 720 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { | 720 | if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { |
| 721 | pr_warn("writepage_start %p on forced umount\n", inode); | 721 | pr_warn("writepage_start %p on forced umount\n", inode); |
| 722 | truncate_pagecache(inode, 0); | ||
| 723 | mapping_set_error(mapping, -EIO); | ||
| 722 | return -EIO; /* we're in a forced umount, don't write! */ | 724 | return -EIO; /* we're in a forced umount, don't write! */ |
| 723 | } | 725 | } |
| 724 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) | 726 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ddd5e9471290..27b566874bc1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -2413,6 +2413,14 @@ again: | |||
| 2413 | goto out_unlock; | 2413 | goto out_unlock; |
| 2414 | } | 2414 | } |
| 2415 | 2415 | ||
| 2416 | if (!__ceph_is_any_caps(ci) && | ||
| 2417 | ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { | ||
| 2418 | dout("get_cap_refs %p forced umount\n", inode); | ||
| 2419 | *err = -EIO; | ||
| 2420 | ret = 1; | ||
| 2421 | goto out_unlock; | ||
| 2422 | } | ||
| 2423 | |||
| 2416 | dout("get_cap_refs %p have %s needed %s\n", inode, | 2424 | dout("get_cap_refs %p have %s needed %s\n", inode, |
| 2417 | ceph_cap_string(have), ceph_cap_string(need)); | 2425 | ceph_cap_string(have), ceph_cap_string(need)); |
| 2418 | } | 2426 | } |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8b79d87eaf46..0c62868b5c56 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -136,7 +136,6 @@ int ceph_open(struct inode *inode, struct file *file) | |||
| 136 | struct ceph_mds_client *mdsc = fsc->mdsc; | 136 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 137 | struct ceph_mds_request *req; | 137 | struct ceph_mds_request *req; |
| 138 | struct ceph_file_info *cf = file->private_data; | 138 | struct ceph_file_info *cf = file->private_data; |
| 139 | struct inode *parent_inode = NULL; | ||
| 140 | int err; | 139 | int err; |
| 141 | int flags, fmode, wanted; | 140 | int flags, fmode, wanted; |
| 142 | 141 | ||
| @@ -210,10 +209,7 @@ int ceph_open(struct inode *inode, struct file *file) | |||
| 210 | ihold(inode); | 209 | ihold(inode); |
| 211 | 210 | ||
| 212 | req->r_num_caps = 1; | 211 | req->r_num_caps = 1; |
| 213 | if (flags & O_CREAT) | 212 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
| 214 | parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry); | ||
| 215 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
| 216 | iput(parent_inode); | ||
| 217 | if (!err) | 213 | if (!err) |
| 218 | err = ceph_init_file(inode, file, req->r_fmode); | 214 | err = ceph_init_file(inode, file, req->r_fmode); |
| 219 | ceph_mdsc_put_request(req); | 215 | ceph_mdsc_put_request(req); |
| @@ -279,7 +275,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
| 279 | if (err) | 275 | if (err) |
| 280 | goto out_req; | 276 | goto out_req; |
| 281 | 277 | ||
| 282 | if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) | 278 | if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) |
| 283 | err = ceph_handle_notrace_create(dir, dentry); | 279 | err = ceph_handle_notrace_create(dir, dentry); |
| 284 | 280 | ||
| 285 | if (d_unhashed(dentry)) { | 281 | if (d_unhashed(dentry)) { |
| @@ -956,6 +952,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
| 956 | /* We can write back this queue in page reclaim */ | 952 | /* We can write back this queue in page reclaim */ |
| 957 | current->backing_dev_info = inode_to_bdi(inode); | 953 | current->backing_dev_info = inode_to_bdi(inode); |
| 958 | 954 | ||
| 955 | if (iocb->ki_flags & IOCB_APPEND) { | ||
| 956 | err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); | ||
| 957 | if (err < 0) | ||
| 958 | goto out; | ||
| 959 | } | ||
| 960 | |||
| 959 | err = generic_write_checks(iocb, from); | 961 | err = generic_write_checks(iocb, from); |
| 960 | if (err <= 0) | 962 | if (err <= 0) |
| 961 | goto out; | 963 | goto out; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6aa07af67603..51cb02da75d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -2107,7 +2107,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
| 2107 | msg = create_request_message(mdsc, req, mds, drop_cap_releases); | 2107 | msg = create_request_message(mdsc, req, mds, drop_cap_releases); |
| 2108 | if (IS_ERR(msg)) { | 2108 | if (IS_ERR(msg)) { |
| 2109 | req->r_err = PTR_ERR(msg); | 2109 | req->r_err = PTR_ERR(msg); |
| 2110 | complete_request(mdsc, req); | ||
| 2111 | return PTR_ERR(msg); | 2110 | return PTR_ERR(msg); |
| 2112 | } | 2111 | } |
| 2113 | req->r_request = msg; | 2112 | req->r_request = msg; |
| @@ -2135,7 +2134,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
| 2135 | { | 2134 | { |
| 2136 | struct ceph_mds_session *session = NULL; | 2135 | struct ceph_mds_session *session = NULL; |
| 2137 | int mds = -1; | 2136 | int mds = -1; |
| 2138 | int err = -EAGAIN; | 2137 | int err = 0; |
| 2139 | 2138 | ||
| 2140 | if (req->r_err || req->r_got_result) { | 2139 | if (req->r_err || req->r_got_result) { |
| 2141 | if (req->r_aborted) | 2140 | if (req->r_aborted) |
| @@ -2149,6 +2148,11 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
| 2149 | err = -EIO; | 2148 | err = -EIO; |
| 2150 | goto finish; | 2149 | goto finish; |
| 2151 | } | 2150 | } |
| 2151 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { | ||
| 2152 | dout("do_request forced umount\n"); | ||
| 2153 | err = -EIO; | ||
| 2154 | goto finish; | ||
| 2155 | } | ||
| 2152 | 2156 | ||
| 2153 | put_request_session(req); | 2157 | put_request_session(req); |
| 2154 | 2158 | ||
| @@ -2196,13 +2200,15 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
| 2196 | 2200 | ||
| 2197 | out_session: | 2201 | out_session: |
| 2198 | ceph_put_mds_session(session); | 2202 | ceph_put_mds_session(session); |
| 2203 | finish: | ||
| 2204 | if (err) { | ||
| 2205 | dout("__do_request early error %d\n", err); | ||
| 2206 | req->r_err = err; | ||
| 2207 | complete_request(mdsc, req); | ||
| 2208 | __unregister_request(mdsc, req); | ||
| 2209 | } | ||
| 2199 | out: | 2210 | out: |
| 2200 | return err; | 2211 | return err; |
| 2201 | |||
| 2202 | finish: | ||
| 2203 | req->r_err = err; | ||
| 2204 | complete_request(mdsc, req); | ||
| 2205 | goto out; | ||
| 2206 | } | 2212 | } |
| 2207 | 2213 | ||
| 2208 | /* | 2214 | /* |
| @@ -2289,8 +2295,6 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
| 2289 | 2295 | ||
| 2290 | if (req->r_err) { | 2296 | if (req->r_err) { |
| 2291 | err = req->r_err; | 2297 | err = req->r_err; |
| 2292 | __unregister_request(mdsc, req); | ||
| 2293 | dout("do_request early error %d\n", err); | ||
| 2294 | goto out; | 2298 | goto out; |
| 2295 | } | 2299 | } |
| 2296 | 2300 | ||
| @@ -2411,7 +2415,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2411 | mutex_unlock(&mdsc->mutex); | 2415 | mutex_unlock(&mdsc->mutex); |
| 2412 | goto out; | 2416 | goto out; |
| 2413 | } | 2417 | } |
| 2414 | if (req->r_got_safe && !head->safe) { | 2418 | if (req->r_got_safe) { |
| 2415 | pr_warn("got unsafe after safe on %llu from mds%d\n", | 2419 | pr_warn("got unsafe after safe on %llu from mds%d\n", |
| 2416 | tid, mds); | 2420 | tid, mds); |
| 2417 | mutex_unlock(&mdsc->mutex); | 2421 | mutex_unlock(&mdsc->mutex); |
| @@ -2520,8 +2524,7 @@ out_err: | |||
| 2520 | if (err) { | 2524 | if (err) { |
| 2521 | req->r_err = err; | 2525 | req->r_err = err; |
| 2522 | } else { | 2526 | } else { |
| 2523 | req->r_reply = msg; | 2527 | req->r_reply = ceph_msg_get(msg); |
| 2524 | ceph_msg_get(msg); | ||
| 2525 | req->r_got_result = true; | 2528 | req->r_got_result = true; |
| 2526 | } | 2529 | } |
| 2527 | } else { | 2530 | } else { |
| @@ -3555,7 +3558,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 3555 | { | 3558 | { |
| 3556 | u64 want_tid, want_flush, want_snap; | 3559 | u64 want_tid, want_flush, want_snap; |
| 3557 | 3560 | ||
| 3558 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | 3561 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) |
| 3559 | return; | 3562 | return; |
| 3560 | 3563 | ||
| 3561 | dout("sync\n"); | 3564 | dout("sync\n"); |
| @@ -3584,7 +3587,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 3584 | */ | 3587 | */ |
| 3585 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) | 3588 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) |
| 3586 | { | 3589 | { |
| 3587 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | 3590 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) |
| 3588 | return true; | 3591 | return true; |
| 3589 | return atomic_read(&mdsc->num_sessions) == 0; | 3592 | return atomic_read(&mdsc->num_sessions) == 0; |
| 3590 | } | 3593 | } |
| @@ -3643,6 +3646,34 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
| 3643 | dout("stopped\n"); | 3646 | dout("stopped\n"); |
| 3644 | } | 3647 | } |
| 3645 | 3648 | ||
| 3649 | void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) | ||
| 3650 | { | ||
| 3651 | struct ceph_mds_session *session; | ||
| 3652 | int mds; | ||
| 3653 | |||
| 3654 | dout("force umount\n"); | ||
| 3655 | |||
| 3656 | mutex_lock(&mdsc->mutex); | ||
| 3657 | for (mds = 0; mds < mdsc->max_sessions; mds++) { | ||
| 3658 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
| 3659 | if (!session) | ||
| 3660 | continue; | ||
| 3661 | mutex_unlock(&mdsc->mutex); | ||
| 3662 | mutex_lock(&session->s_mutex); | ||
| 3663 | __close_session(mdsc, session); | ||
| 3664 | if (session->s_state == CEPH_MDS_SESSION_CLOSING) { | ||
| 3665 | cleanup_session_requests(mdsc, session); | ||
| 3666 | remove_session_caps(session); | ||
| 3667 | } | ||
| 3668 | mutex_unlock(&session->s_mutex); | ||
| 3669 | ceph_put_mds_session(session); | ||
| 3670 | mutex_lock(&mdsc->mutex); | ||
| 3671 | kick_requests(mdsc, mds); | ||
| 3672 | } | ||
| 3673 | __wake_requests(mdsc, &mdsc->waiting_for_map); | ||
| 3674 | mutex_unlock(&mdsc->mutex); | ||
| 3675 | } | ||
| 3676 | |||
| 3646 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | 3677 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) |
| 3647 | { | 3678 | { |
| 3648 | dout("stop\n"); | 3679 | dout("stop\n"); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 762757e6cebf..f575eafe2261 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -366,6 +366,7 @@ extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | |||
| 366 | 366 | ||
| 367 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); | 367 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); |
| 368 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | 368 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); |
| 369 | extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc); | ||
| 369 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); | 370 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); |
| 370 | 371 | ||
| 371 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | 372 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 233d906aec02..4aa7122a8d38 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
| @@ -338,12 +338,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) | |||
| 338 | return 0; | 338 | return 0; |
| 339 | } | 339 | } |
| 340 | 340 | ||
| 341 | if (num == 0 && realm->seq == ceph_empty_snapc->seq) { | ||
| 342 | ceph_get_snap_context(ceph_empty_snapc); | ||
| 343 | snapc = ceph_empty_snapc; | ||
| 344 | goto done; | ||
| 345 | } | ||
| 346 | |||
| 347 | /* alloc new snap context */ | 341 | /* alloc new snap context */ |
| 348 | err = -ENOMEM; | 342 | err = -ENOMEM; |
| 349 | if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) | 343 | if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) |
| @@ -381,7 +375,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) | |||
| 381 | realm->ino, realm, snapc, snapc->seq, | 375 | realm->ino, realm, snapc, snapc->seq, |
| 382 | (unsigned int) snapc->num_snaps); | 376 | (unsigned int) snapc->num_snaps); |
| 383 | 377 | ||
| 384 | done: | ||
| 385 | ceph_put_snap_context(realm->cached_context); | 378 | ceph_put_snap_context(realm->cached_context); |
| 386 | realm->cached_context = snapc; | 379 | realm->cached_context = snapc; |
| 387 | return 0; | 380 | return 0; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7b6bfcbf801c..f446afada328 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -708,6 +708,7 @@ static void ceph_umount_begin(struct super_block *sb) | |||
| 708 | if (!fsc) | 708 | if (!fsc) |
| 709 | return; | 709 | return; |
| 710 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | 710 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; |
| 711 | ceph_mdsc_force_umount(fsc->mdsc); | ||
| 711 | return; | 712 | return; |
| 712 | } | 713 | } |
| 713 | 714 | ||
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9ebee53d3bf5..397c5cd09794 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
| @@ -46,6 +46,7 @@ struct ceph_options { | |||
| 46 | unsigned long mount_timeout; /* jiffies */ | 46 | unsigned long mount_timeout; /* jiffies */ |
| 47 | unsigned long osd_idle_ttl; /* jiffies */ | 47 | unsigned long osd_idle_ttl; /* jiffies */ |
| 48 | unsigned long osd_keepalive_timeout; /* jiffies */ | 48 | unsigned long osd_keepalive_timeout; /* jiffies */ |
| 49 | unsigned long monc_ping_timeout; /* jiffies */ | ||
| 49 | 50 | ||
| 50 | /* | 51 | /* |
| 51 | * any type that can't be simply compared or doesn't need need | 52 | * any type that can't be simply compared or doesn't need need |
| @@ -66,6 +67,7 @@ struct ceph_options { | |||
| 66 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) | 67 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) |
| 67 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) | 68 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) |
| 68 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) | 69 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) |
| 70 | #define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) | ||
| 69 | 71 | ||
| 70 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 72 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
| 71 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) | 73 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 37753278987a..7e1252e97a30 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
| @@ -248,6 +248,8 @@ struct ceph_connection { | |||
| 248 | int in_base_pos; /* bytes read */ | 248 | int in_base_pos; /* bytes read */ |
| 249 | __le64 in_temp_ack; /* for reading an ack */ | 249 | __le64 in_temp_ack; /* for reading an ack */ |
| 250 | 250 | ||
| 251 | struct timespec last_keepalive_ack; | ||
| 252 | |||
| 251 | struct delayed_work work; /* send|recv work */ | 253 | struct delayed_work work; /* send|recv work */ |
| 252 | unsigned long delay; /* current delay interval */ | 254 | unsigned long delay; /* current delay interval */ |
| 253 | }; | 255 | }; |
| @@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg); | |||
| 285 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); | 287 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); |
| 286 | 288 | ||
| 287 | extern void ceph_con_keepalive(struct ceph_connection *con); | 289 | extern void ceph_con_keepalive(struct ceph_connection *con); |
| 290 | extern bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
| 291 | unsigned long interval); | ||
| 288 | 292 | ||
| 289 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, | 293 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, |
| 290 | size_t length, size_t alignment); | 294 | size_t length, size_t alignment); |
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 1c1887206ffa..0fe2656ac415 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
| @@ -84,10 +84,12 @@ struct ceph_entity_inst { | |||
| 84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ | 84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ |
| 85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ | 85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ |
| 86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ | 86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ |
| 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ |
| 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ | 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ |
| 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ | 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ |
| 90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ | 90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ |
| 91 | #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ | ||
| 92 | #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ | ||
| 91 | 93 | ||
| 92 | 94 | ||
| 93 | /* | 95 | /* |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 69a4d30a9ccf..54a00d66509e 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
| @@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
| 357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; |
| 358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; | 358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; |
| 359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; | 359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; |
| 360 | opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT; | ||
| 360 | 361 | ||
| 361 | /* get mon ip(s) */ | 362 | /* get mon ip(s) */ |
| 362 | /* ip1[:port1][,ip2[:port2]...] */ | 363 | /* ip1[:port1][,ip2[:port2]...] */ |
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 790fe89d90c0..4440edcce0d6 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c | |||
| @@ -79,10 +79,6 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) | |||
| 79 | return 0; | 79 | return 0; |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | |||
| 83 | |||
| 84 | #define AES_KEY_SIZE 16 | ||
| 85 | |||
| 86 | static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) | 82 | static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) |
| 87 | { | 83 | { |
| 88 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); | 84 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index e3be1d22a247..525f454f7531 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache; | |||
| 163 | static char tag_msg = CEPH_MSGR_TAG_MSG; | 163 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
| 164 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 164 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
| 165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | 165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; |
| 166 | static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; | ||
| 166 | 167 | ||
| 167 | #ifdef CONFIG_LOCKDEP | 168 | #ifdef CONFIG_LOCKDEP |
| 168 | static struct lock_class_key socket_class; | 169 | static struct lock_class_key socket_class; |
| @@ -176,7 +177,7 @@ static struct lock_class_key socket_class; | |||
| 176 | 177 | ||
| 177 | static void queue_con(struct ceph_connection *con); | 178 | static void queue_con(struct ceph_connection *con); |
| 178 | static void cancel_con(struct ceph_connection *con); | 179 | static void cancel_con(struct ceph_connection *con); |
| 179 | static void con_work(struct work_struct *); | 180 | static void ceph_con_workfn(struct work_struct *); |
| 180 | static void con_fault(struct ceph_connection *con); | 181 | static void con_fault(struct ceph_connection *con); |
| 181 | 182 | ||
| 182 | /* | 183 | /* |
| @@ -276,22 +277,22 @@ static void _ceph_msgr_exit(void) | |||
| 276 | ceph_msgr_wq = NULL; | 277 | ceph_msgr_wq = NULL; |
| 277 | } | 278 | } |
| 278 | 279 | ||
| 279 | ceph_msgr_slab_exit(); | ||
| 280 | |||
| 281 | BUG_ON(zero_page == NULL); | 280 | BUG_ON(zero_page == NULL); |
| 282 | page_cache_release(zero_page); | 281 | page_cache_release(zero_page); |
| 283 | zero_page = NULL; | 282 | zero_page = NULL; |
| 283 | |||
| 284 | ceph_msgr_slab_exit(); | ||
| 284 | } | 285 | } |
| 285 | 286 | ||
| 286 | int ceph_msgr_init(void) | 287 | int ceph_msgr_init(void) |
| 287 | { | 288 | { |
| 289 | if (ceph_msgr_slab_init()) | ||
| 290 | return -ENOMEM; | ||
| 291 | |||
| 288 | BUG_ON(zero_page != NULL); | 292 | BUG_ON(zero_page != NULL); |
| 289 | zero_page = ZERO_PAGE(0); | 293 | zero_page = ZERO_PAGE(0); |
| 290 | page_cache_get(zero_page); | 294 | page_cache_get(zero_page); |
| 291 | 295 | ||
| 292 | if (ceph_msgr_slab_init()) | ||
| 293 | return -ENOMEM; | ||
| 294 | |||
| 295 | /* | 296 | /* |
| 296 | * The number of active work items is limited by the number of | 297 | * The number of active work items is limited by the number of |
| 297 | * connections, so leave @max_active at default. | 298 | * connections, so leave @max_active at default. |
| @@ -749,7 +750,7 @@ void ceph_con_init(struct ceph_connection *con, void *private, | |||
| 749 | mutex_init(&con->mutex); | 750 | mutex_init(&con->mutex); |
| 750 | INIT_LIST_HEAD(&con->out_queue); | 751 | INIT_LIST_HEAD(&con->out_queue); |
| 751 | INIT_LIST_HEAD(&con->out_sent); | 752 | INIT_LIST_HEAD(&con->out_sent); |
| 752 | INIT_DELAYED_WORK(&con->work, con_work); | 753 | INIT_DELAYED_WORK(&con->work, ceph_con_workfn); |
| 753 | 754 | ||
| 754 | con->state = CON_STATE_CLOSED; | 755 | con->state = CON_STATE_CLOSED; |
| 755 | } | 756 | } |
| @@ -1351,7 +1352,15 @@ static void prepare_write_keepalive(struct ceph_connection *con) | |||
| 1351 | { | 1352 | { |
| 1352 | dout("prepare_write_keepalive %p\n", con); | 1353 | dout("prepare_write_keepalive %p\n", con); |
| 1353 | con_out_kvec_reset(con); | 1354 | con_out_kvec_reset(con); |
| 1354 | con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); | 1355 | if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { |
| 1356 | struct timespec ts = CURRENT_TIME; | ||
| 1357 | struct ceph_timespec ceph_ts; | ||
| 1358 | ceph_encode_timespec(&ceph_ts, &ts); | ||
| 1359 | con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); | ||
| 1360 | con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); | ||
| 1361 | } else { | ||
| 1362 | con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); | ||
| 1363 | } | ||
| 1355 | con_flag_set(con, CON_FLAG_WRITE_PENDING); | 1364 | con_flag_set(con, CON_FLAG_WRITE_PENDING); |
| 1356 | } | 1365 | } |
| 1357 | 1366 | ||
| @@ -1625,6 +1634,12 @@ static void prepare_read_tag(struct ceph_connection *con) | |||
| 1625 | con->in_tag = CEPH_MSGR_TAG_READY; | 1634 | con->in_tag = CEPH_MSGR_TAG_READY; |
| 1626 | } | 1635 | } |
| 1627 | 1636 | ||
| 1637 | static void prepare_read_keepalive_ack(struct ceph_connection *con) | ||
| 1638 | { | ||
| 1639 | dout("prepare_read_keepalive_ack %p\n", con); | ||
| 1640 | con->in_base_pos = 0; | ||
| 1641 | } | ||
| 1642 | |||
| 1628 | /* | 1643 | /* |
| 1629 | * Prepare to read a message. | 1644 | * Prepare to read a message. |
| 1630 | */ | 1645 | */ |
| @@ -2322,13 +2337,6 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 2322 | return ret; | 2337 | return ret; |
| 2323 | 2338 | ||
| 2324 | BUG_ON(!con->in_msg ^ skip); | 2339 | BUG_ON(!con->in_msg ^ skip); |
| 2325 | if (con->in_msg && data_len > con->in_msg->data_length) { | ||
| 2326 | pr_warn("%s skipping long message (%u > %zd)\n", | ||
| 2327 | __func__, data_len, con->in_msg->data_length); | ||
| 2328 | ceph_msg_put(con->in_msg); | ||
| 2329 | con->in_msg = NULL; | ||
| 2330 | skip = 1; | ||
| 2331 | } | ||
| 2332 | if (skip) { | 2340 | if (skip) { |
| 2333 | /* skip this message */ | 2341 | /* skip this message */ |
| 2334 | dout("alloc_msg said skip message\n"); | 2342 | dout("alloc_msg said skip message\n"); |
| @@ -2457,6 +2465,17 @@ static void process_message(struct ceph_connection *con) | |||
| 2457 | mutex_lock(&con->mutex); | 2465 | mutex_lock(&con->mutex); |
| 2458 | } | 2466 | } |
| 2459 | 2467 | ||
| 2468 | static int read_keepalive_ack(struct ceph_connection *con) | ||
| 2469 | { | ||
| 2470 | struct ceph_timespec ceph_ts; | ||
| 2471 | size_t size = sizeof(ceph_ts); | ||
| 2472 | int ret = read_partial(con, size, size, &ceph_ts); | ||
| 2473 | if (ret <= 0) | ||
| 2474 | return ret; | ||
| 2475 | ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts); | ||
| 2476 | prepare_read_tag(con); | ||
| 2477 | return 1; | ||
| 2478 | } | ||
| 2460 | 2479 | ||
| 2461 | /* | 2480 | /* |
| 2462 | * Write something to the socket. Called in a worker thread when the | 2481 | * Write something to the socket. Called in a worker thread when the |
| @@ -2526,6 +2545,10 @@ more_kvec: | |||
| 2526 | 2545 | ||
| 2527 | do_next: | 2546 | do_next: |
| 2528 | if (con->state == CON_STATE_OPEN) { | 2547 | if (con->state == CON_STATE_OPEN) { |
| 2548 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
| 2549 | prepare_write_keepalive(con); | ||
| 2550 | goto more; | ||
| 2551 | } | ||
| 2529 | /* is anything else pending? */ | 2552 | /* is anything else pending? */ |
| 2530 | if (!list_empty(&con->out_queue)) { | 2553 | if (!list_empty(&con->out_queue)) { |
| 2531 | prepare_write_message(con); | 2554 | prepare_write_message(con); |
| @@ -2535,10 +2558,6 @@ do_next: | |||
| 2535 | prepare_write_ack(con); | 2558 | prepare_write_ack(con); |
| 2536 | goto more; | 2559 | goto more; |
| 2537 | } | 2560 | } |
| 2538 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
| 2539 | prepare_write_keepalive(con); | ||
| 2540 | goto more; | ||
| 2541 | } | ||
| 2542 | } | 2561 | } |
| 2543 | 2562 | ||
| 2544 | /* Nothing to do! */ | 2563 | /* Nothing to do! */ |
| @@ -2641,6 +2660,9 @@ more: | |||
| 2641 | case CEPH_MSGR_TAG_ACK: | 2660 | case CEPH_MSGR_TAG_ACK: |
| 2642 | prepare_read_ack(con); | 2661 | prepare_read_ack(con); |
| 2643 | break; | 2662 | break; |
| 2663 | case CEPH_MSGR_TAG_KEEPALIVE2_ACK: | ||
| 2664 | prepare_read_keepalive_ack(con); | ||
| 2665 | break; | ||
| 2644 | case CEPH_MSGR_TAG_CLOSE: | 2666 | case CEPH_MSGR_TAG_CLOSE: |
| 2645 | con_close_socket(con); | 2667 | con_close_socket(con); |
| 2646 | con->state = CON_STATE_CLOSED; | 2668 | con->state = CON_STATE_CLOSED; |
| @@ -2684,6 +2706,12 @@ more: | |||
| 2684 | process_ack(con); | 2706 | process_ack(con); |
| 2685 | goto more; | 2707 | goto more; |
| 2686 | } | 2708 | } |
| 2709 | if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { | ||
| 2710 | ret = read_keepalive_ack(con); | ||
| 2711 | if (ret <= 0) | ||
| 2712 | goto out; | ||
| 2713 | goto more; | ||
| 2714 | } | ||
| 2687 | 2715 | ||
| 2688 | out: | 2716 | out: |
| 2689 | dout("try_read done on %p ret %d\n", con, ret); | 2717 | dout("try_read done on %p ret %d\n", con, ret); |
| @@ -2799,7 +2827,7 @@ static void con_fault_finish(struct ceph_connection *con) | |||
| 2799 | /* | 2827 | /* |
| 2800 | * Do some work on a connection. Drop a connection ref when we're done. | 2828 | * Do some work on a connection. Drop a connection ref when we're done. |
| 2801 | */ | 2829 | */ |
| 2802 | static void con_work(struct work_struct *work) | 2830 | static void ceph_con_workfn(struct work_struct *work) |
| 2803 | { | 2831 | { |
| 2804 | struct ceph_connection *con = container_of(work, struct ceph_connection, | 2832 | struct ceph_connection *con = container_of(work, struct ceph_connection, |
| 2805 | work.work); | 2833 | work.work); |
| @@ -3101,6 +3129,20 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
| 3101 | } | 3129 | } |
| 3102 | EXPORT_SYMBOL(ceph_con_keepalive); | 3130 | EXPORT_SYMBOL(ceph_con_keepalive); |
| 3103 | 3131 | ||
| 3132 | bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
| 3133 | unsigned long interval) | ||
| 3134 | { | ||
| 3135 | if (interval > 0 && | ||
| 3136 | (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { | ||
| 3137 | struct timespec now = CURRENT_TIME; | ||
| 3138 | struct timespec ts; | ||
| 3139 | jiffies_to_timespec(interval, &ts); | ||
| 3140 | ts = timespec_add(con->last_keepalive_ack, ts); | ||
| 3141 | return timespec_compare(&now, &ts) >= 0; | ||
| 3142 | } | ||
| 3143 | return false; | ||
| 3144 | } | ||
| 3145 | |||
| 3104 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) | 3146 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) |
| 3105 | { | 3147 | { |
| 3106 | struct ceph_msg_data *data; | 3148 | struct ceph_msg_data *data; |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9d6ff1215928..edda01626a45 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc) | |||
| 149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, | 149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, |
| 150 | &monc->monmap->mon_inst[monc->cur_mon].addr); | 150 | &monc->monmap->mon_inst[monc->cur_mon].addr); |
| 151 | 151 | ||
| 152 | /* send an initial keepalive to ensure our timestamp is | ||
| 153 | * valid by the time we are in an OPENED state */ | ||
| 154 | ceph_con_keepalive(&monc->con); | ||
| 155 | |||
| 152 | /* initiatiate authentication handshake */ | 156 | /* initiatiate authentication handshake */ |
| 153 | ret = ceph_auth_build_hello(monc->auth, | 157 | ret = ceph_auth_build_hello(monc->auth, |
| 154 | monc->m_auth->front.iov_base, | 158 | monc->m_auth->front.iov_base, |
| @@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc) | |||
| 170 | */ | 174 | */ |
| 171 | static void __schedule_delayed(struct ceph_mon_client *monc) | 175 | static void __schedule_delayed(struct ceph_mon_client *monc) |
| 172 | { | 176 | { |
| 173 | unsigned int delay; | 177 | struct ceph_options *opt = monc->client->options; |
| 178 | unsigned long delay; | ||
| 174 | 179 | ||
| 175 | if (monc->cur_mon < 0 || __sub_expired(monc)) | 180 | if (monc->cur_mon < 0 || __sub_expired(monc)) { |
| 176 | delay = 10 * HZ; | 181 | delay = 10 * HZ; |
| 177 | else | 182 | } else { |
| 178 | delay = 20 * HZ; | 183 | delay = 20 * HZ; |
| 179 | dout("__schedule_delayed after %u\n", delay); | 184 | if (opt->monc_ping_timeout > 0) |
| 180 | schedule_delayed_work(&monc->delayed_work, delay); | 185 | delay = min(delay, opt->monc_ping_timeout / 3); |
| 186 | } | ||
| 187 | dout("__schedule_delayed after %lu\n", delay); | ||
| 188 | schedule_delayed_work(&monc->delayed_work, | ||
| 189 | round_jiffies_relative(delay)); | ||
| 181 | } | 190 | } |
| 182 | 191 | ||
| 183 | /* | 192 | /* |
| @@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work) | |||
| 743 | __close_session(monc); | 752 | __close_session(monc); |
| 744 | __open_session(monc); /* continue hunting */ | 753 | __open_session(monc); /* continue hunting */ |
| 745 | } else { | 754 | } else { |
| 746 | ceph_con_keepalive(&monc->con); | 755 | struct ceph_options *opt = monc->client->options; |
| 756 | int is_auth = ceph_auth_is_authenticated(monc->auth); | ||
| 757 | if (ceph_con_keepalive_expired(&monc->con, | ||
| 758 | opt->monc_ping_timeout)) { | ||
| 759 | dout("monc keepalive timeout\n"); | ||
| 760 | is_auth = 0; | ||
| 761 | __close_session(monc); | ||
| 762 | monc->hunting = true; | ||
| 763 | __open_session(monc); | ||
| 764 | } | ||
| 747 | 765 | ||
| 748 | __validate_auth(monc); | 766 | if (!monc->hunting) { |
| 767 | ceph_con_keepalive(&monc->con); | ||
| 768 | __validate_auth(monc); | ||
| 769 | } | ||
| 749 | 770 | ||
| 750 | if (ceph_auth_is_authenticated(monc->auth)) | 771 | if (is_auth) |
| 751 | __send_subscribe(monc); | 772 | __send_subscribe(monc); |
| 752 | } | 773 | } |
| 753 | __schedule_delayed(monc); | 774 | __schedule_delayed(monc); |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 50033677c0fa..80b94e37c94a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -2817,8 +2817,9 @@ out: | |||
| 2817 | } | 2817 | } |
| 2818 | 2818 | ||
| 2819 | /* | 2819 | /* |
| 2820 | * lookup and return message for incoming reply. set up reply message | 2820 | * Lookup and return message for incoming reply. Don't try to do |
| 2821 | * pages. | 2821 | * anything about a larger than preallocated data portion of the |
| 2822 | * message at the moment - for now, just skip the message. | ||
| 2822 | */ | 2823 | */ |
| 2823 | static struct ceph_msg *get_reply(struct ceph_connection *con, | 2824 | static struct ceph_msg *get_reply(struct ceph_connection *con, |
| 2824 | struct ceph_msg_header *hdr, | 2825 | struct ceph_msg_header *hdr, |
| @@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 2836 | mutex_lock(&osdc->request_mutex); | 2837 | mutex_lock(&osdc->request_mutex); |
| 2837 | req = __lookup_request(osdc, tid); | 2838 | req = __lookup_request(osdc, tid); |
| 2838 | if (!req) { | 2839 | if (!req) { |
| 2839 | *skip = 1; | 2840 | pr_warn("%s osd%d tid %llu unknown, skipping\n", |
| 2841 | __func__, osd->o_osd, tid); | ||
| 2840 | m = NULL; | 2842 | m = NULL; |
| 2841 | dout("get_reply unknown tid %llu from osd%d\n", tid, | 2843 | *skip = 1; |
| 2842 | osd->o_osd); | ||
| 2843 | goto out; | 2844 | goto out; |
| 2844 | } | 2845 | } |
| 2845 | 2846 | ||
| @@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 2849 | ceph_msg_revoke_incoming(req->r_reply); | 2850 | ceph_msg_revoke_incoming(req->r_reply); |
| 2850 | 2851 | ||
| 2851 | if (front_len > req->r_reply->front_alloc_len) { | 2852 | if (front_len > req->r_reply->front_alloc_len) { |
| 2852 | pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", | 2853 | pr_warn("%s osd%d tid %llu front %d > preallocated %d\n", |
| 2853 | front_len, req->r_reply->front_alloc_len, | 2854 | __func__, osd->o_osd, req->r_tid, front_len, |
| 2854 | (unsigned int)con->peer_name.type, | 2855 | req->r_reply->front_alloc_len); |
| 2855 | le64_to_cpu(con->peer_name.num)); | ||
| 2856 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, | 2856 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, |
| 2857 | false); | 2857 | false); |
| 2858 | if (!m) | 2858 | if (!m) |
| @@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 2860 | ceph_msg_put(req->r_reply); | 2860 | ceph_msg_put(req->r_reply); |
| 2861 | req->r_reply = m; | 2861 | req->r_reply = m; |
| 2862 | } | 2862 | } |
| 2863 | m = ceph_msg_get(req->r_reply); | ||
| 2864 | |||
| 2865 | if (data_len > 0) { | ||
| 2866 | struct ceph_osd_data *osd_data; | ||
| 2867 | 2863 | ||
| 2868 | /* | 2864 | if (data_len > req->r_reply->data_length) { |
| 2869 | * XXX This is assuming there is only one op containing | 2865 | pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", |
| 2870 | * XXX page data. Probably OK for reads, but this | 2866 | __func__, osd->o_osd, req->r_tid, data_len, |
| 2871 | * XXX ought to be done more generally. | 2867 | req->r_reply->data_length); |
| 2872 | */ | 2868 | m = NULL; |
| 2873 | osd_data = osd_req_op_extent_osd_data(req, 0); | 2869 | *skip = 1; |
| 2874 | if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { | 2870 | goto out; |
| 2875 | if (osd_data->pages && | ||
| 2876 | unlikely(osd_data->length < data_len)) { | ||
| 2877 | |||
| 2878 | pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n", | ||
| 2879 | tid, data_len, osd_data->length); | ||
| 2880 | *skip = 1; | ||
| 2881 | ceph_msg_put(m); | ||
| 2882 | m = NULL; | ||
| 2883 | goto out; | ||
| 2884 | } | ||
| 2885 | } | ||
| 2886 | } | 2871 | } |
| 2887 | *skip = 0; | 2872 | |
| 2873 | m = ceph_msg_get(req->r_reply); | ||
| 2888 | dout("get_reply tid %lld %p\n", tid, m); | 2874 | dout("get_reply tid %lld %p\n", tid, m); |
| 2889 | 2875 | ||
| 2890 | out: | 2876 | out: |
| 2891 | mutex_unlock(&osdc->request_mutex); | 2877 | mutex_unlock(&osdc->request_mutex); |
| 2892 | return m; | 2878 | return m; |
| 2893 | |||
| 2894 | } | 2879 | } |
| 2895 | 2880 | ||
| 2896 | static struct ceph_msg *alloc_msg(struct ceph_connection *con, | 2881 | static struct ceph_msg *alloc_msg(struct ceph_connection *con, |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 4a3125836b64..7d8f581d9f1f 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -1300,7 +1300,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 1300 | ceph_decode_addr(&addr); | 1300 | ceph_decode_addr(&addr); |
| 1301 | pr_info("osd%d up\n", osd); | 1301 | pr_info("osd%d up\n", osd); |
| 1302 | BUG_ON(osd >= map->max_osd); | 1302 | BUG_ON(osd >= map->max_osd); |
| 1303 | map->osd_state[osd] |= CEPH_OSD_UP; | 1303 | map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS; |
| 1304 | map->osd_addr[osd] = addr; | 1304 | map->osd_addr[osd] = addr; |
| 1305 | } | 1305 | } |
| 1306 | 1306 | ||
