diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-11 15:33:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-11 15:33:03 -0400 |
commit | e013f74b60bbd37ee8c3a55214eb351ea3101c15 (patch) | |
tree | 096b59f550dea6df9347edf97b872dc75a79f653 | |
parent | 01cab5549c3e9a0fe7248fc5ad0fd79361cc0d39 (diff) | |
parent | 438386853d4c0c48fe73bf05a7d61c70ca5a3bfb (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph update from Sage Weil:
"There are a few fixes for snapshot behavior with CephFS and support
for the new keepalive protocol from Zheng, a libceph fix that affects
both RBD and CephFS, a few bug fixes and cleanups for RBD from Ilya,
and several small fixes and cleanups from Jianpeng and others"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: improve readahead for file holes
ceph: get inode size for each append write
libceph: check data_len in ->alloc_msg()
libceph: use keepalive2 to verify the mon session is alive
rbd: plug rbd_dev->header.object_prefix memory leak
rbd: fix double free on rbd_dev->header_name
libceph: set 'exists' flag for newly up osd
ceph: cleanup use of ceph_msg_get
ceph: no need to get parent inode in ceph_open
ceph: remove the useless judgement
ceph: remove redundant test of head->safe and silence static analysis warnings
ceph: fix queuing inode to mdsdir's snaprealm
libceph: rename con_work() to ceph_con_workfn()
libceph: Avoid holding the zero page on ceph_msgr_slab_init errors
libceph: remove the unused macro AES_KEY_SIZE
ceph: invalidate dirty pages after forced umount
ceph: EIO all operations after forced umount
-rw-r--r-- | drivers/block/rbd.c | 6 | ||||
-rw-r--r-- | fs/ceph/addr.c | 6 | ||||
-rw-r--r-- | fs/ceph/caps.c | 8 | ||||
-rw-r--r-- | fs/ceph/file.c | 14 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 59 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
-rw-r--r-- | fs/ceph/snap.c | 7 | ||||
-rw-r--r-- | fs/ceph/super.c | 1 | ||||
-rw-r--r-- | include/linux/ceph/libceph.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 4 | ||||
-rw-r--r-- | include/linux/ceph/msgr.h | 4 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 1 | ||||
-rw-r--r-- | net/ceph/crypto.c | 4 | ||||
-rw-r--r-- | net/ceph/messenger.c | 82 | ||||
-rw-r--r-- | net/ceph/mon_client.c | 37 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 51 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 2 |
17 files changed, 191 insertions, 98 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 698f761037ce..d93a0372b37b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -4673,7 +4673,10 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) | |||
4673 | } | 4673 | } |
4674 | 4674 | ||
4675 | ret = rbd_dev_v2_snap_context(rbd_dev); | 4675 | ret = rbd_dev_v2_snap_context(rbd_dev); |
4676 | dout("rbd_dev_v2_snap_context returned %d\n", ret); | 4676 | if (ret && first_time) { |
4677 | kfree(rbd_dev->header.object_prefix); | ||
4678 | rbd_dev->header.object_prefix = NULL; | ||
4679 | } | ||
4677 | 4680 | ||
4678 | return ret; | 4681 | return ret; |
4679 | } | 4682 | } |
@@ -5154,7 +5157,6 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) | |||
5154 | out_err: | 5157 | out_err: |
5155 | if (parent) { | 5158 | if (parent) { |
5156 | rbd_dev_unparent(rbd_dev); | 5159 | rbd_dev_unparent(rbd_dev); |
5157 | kfree(rbd_dev->header_name); | ||
5158 | rbd_dev_destroy(parent); | 5160 | rbd_dev_destroy(parent); |
5159 | } else { | 5161 | } else { |
5160 | rbd_put_client(rbdc); | 5162 | rbd_put_client(rbdc); |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a268abfe60ac..9d23e788d1df 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -276,7 +276,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) | |||
276 | for (i = 0; i < num_pages; i++) { | 276 | for (i = 0; i < num_pages; i++) { |
277 | struct page *page = osd_data->pages[i]; | 277 | struct page *page = osd_data->pages[i]; |
278 | 278 | ||
279 | if (rc < 0) | 279 | if (rc < 0 && rc != ENOENT) |
280 | goto unlock; | 280 | goto unlock; |
281 | if (bytes < (int)PAGE_CACHE_SIZE) { | 281 | if (bytes < (int)PAGE_CACHE_SIZE) { |
282 | /* zero (remainder of) page */ | 282 | /* zero (remainder of) page */ |
@@ -717,8 +717,10 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
717 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 717 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
718 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 718 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
719 | 719 | ||
720 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { | 720 | if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { |
721 | pr_warn("writepage_start %p on forced umount\n", inode); | 721 | pr_warn("writepage_start %p on forced umount\n", inode); |
722 | truncate_pagecache(inode, 0); | ||
723 | mapping_set_error(mapping, -EIO); | ||
722 | return -EIO; /* we're in a forced umount, don't write! */ | 724 | return -EIO; /* we're in a forced umount, don't write! */ |
723 | } | 725 | } |
724 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) | 726 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ddd5e9471290..27b566874bc1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2413,6 +2413,14 @@ again: | |||
2413 | goto out_unlock; | 2413 | goto out_unlock; |
2414 | } | 2414 | } |
2415 | 2415 | ||
2416 | if (!__ceph_is_any_caps(ci) && | ||
2417 | ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { | ||
2418 | dout("get_cap_refs %p forced umount\n", inode); | ||
2419 | *err = -EIO; | ||
2420 | ret = 1; | ||
2421 | goto out_unlock; | ||
2422 | } | ||
2423 | |||
2416 | dout("get_cap_refs %p have %s needed %s\n", inode, | 2424 | dout("get_cap_refs %p have %s needed %s\n", inode, |
2417 | ceph_cap_string(have), ceph_cap_string(need)); | 2425 | ceph_cap_string(have), ceph_cap_string(need)); |
2418 | } | 2426 | } |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8b79d87eaf46..0c62868b5c56 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -136,7 +136,6 @@ int ceph_open(struct inode *inode, struct file *file) | |||
136 | struct ceph_mds_client *mdsc = fsc->mdsc; | 136 | struct ceph_mds_client *mdsc = fsc->mdsc; |
137 | struct ceph_mds_request *req; | 137 | struct ceph_mds_request *req; |
138 | struct ceph_file_info *cf = file->private_data; | 138 | struct ceph_file_info *cf = file->private_data; |
139 | struct inode *parent_inode = NULL; | ||
140 | int err; | 139 | int err; |
141 | int flags, fmode, wanted; | 140 | int flags, fmode, wanted; |
142 | 141 | ||
@@ -210,10 +209,7 @@ int ceph_open(struct inode *inode, struct file *file) | |||
210 | ihold(inode); | 209 | ihold(inode); |
211 | 210 | ||
212 | req->r_num_caps = 1; | 211 | req->r_num_caps = 1; |
213 | if (flags & O_CREAT) | 212 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
214 | parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry); | ||
215 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
216 | iput(parent_inode); | ||
217 | if (!err) | 213 | if (!err) |
218 | err = ceph_init_file(inode, file, req->r_fmode); | 214 | err = ceph_init_file(inode, file, req->r_fmode); |
219 | ceph_mdsc_put_request(req); | 215 | ceph_mdsc_put_request(req); |
@@ -279,7 +275,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
279 | if (err) | 275 | if (err) |
280 | goto out_req; | 276 | goto out_req; |
281 | 277 | ||
282 | if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) | 278 | if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) |
283 | err = ceph_handle_notrace_create(dir, dentry); | 279 | err = ceph_handle_notrace_create(dir, dentry); |
284 | 280 | ||
285 | if (d_unhashed(dentry)) { | 281 | if (d_unhashed(dentry)) { |
@@ -956,6 +952,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
956 | /* We can write back this queue in page reclaim */ | 952 | /* We can write back this queue in page reclaim */ |
957 | current->backing_dev_info = inode_to_bdi(inode); | 953 | current->backing_dev_info = inode_to_bdi(inode); |
958 | 954 | ||
955 | if (iocb->ki_flags & IOCB_APPEND) { | ||
956 | err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); | ||
957 | if (err < 0) | ||
958 | goto out; | ||
959 | } | ||
960 | |||
959 | err = generic_write_checks(iocb, from); | 961 | err = generic_write_checks(iocb, from); |
960 | if (err <= 0) | 962 | if (err <= 0) |
961 | goto out; | 963 | goto out; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6aa07af67603..51cb02da75d9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -2107,7 +2107,6 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
2107 | msg = create_request_message(mdsc, req, mds, drop_cap_releases); | 2107 | msg = create_request_message(mdsc, req, mds, drop_cap_releases); |
2108 | if (IS_ERR(msg)) { | 2108 | if (IS_ERR(msg)) { |
2109 | req->r_err = PTR_ERR(msg); | 2109 | req->r_err = PTR_ERR(msg); |
2110 | complete_request(mdsc, req); | ||
2111 | return PTR_ERR(msg); | 2110 | return PTR_ERR(msg); |
2112 | } | 2111 | } |
2113 | req->r_request = msg; | 2112 | req->r_request = msg; |
@@ -2135,7 +2134,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
2135 | { | 2134 | { |
2136 | struct ceph_mds_session *session = NULL; | 2135 | struct ceph_mds_session *session = NULL; |
2137 | int mds = -1; | 2136 | int mds = -1; |
2138 | int err = -EAGAIN; | 2137 | int err = 0; |
2139 | 2138 | ||
2140 | if (req->r_err || req->r_got_result) { | 2139 | if (req->r_err || req->r_got_result) { |
2141 | if (req->r_aborted) | 2140 | if (req->r_aborted) |
@@ -2149,6 +2148,11 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
2149 | err = -EIO; | 2148 | err = -EIO; |
2150 | goto finish; | 2149 | goto finish; |
2151 | } | 2150 | } |
2151 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { | ||
2152 | dout("do_request forced umount\n"); | ||
2153 | err = -EIO; | ||
2154 | goto finish; | ||
2155 | } | ||
2152 | 2156 | ||
2153 | put_request_session(req); | 2157 | put_request_session(req); |
2154 | 2158 | ||
@@ -2196,13 +2200,15 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
2196 | 2200 | ||
2197 | out_session: | 2201 | out_session: |
2198 | ceph_put_mds_session(session); | 2202 | ceph_put_mds_session(session); |
2203 | finish: | ||
2204 | if (err) { | ||
2205 | dout("__do_request early error %d\n", err); | ||
2206 | req->r_err = err; | ||
2207 | complete_request(mdsc, req); | ||
2208 | __unregister_request(mdsc, req); | ||
2209 | } | ||
2199 | out: | 2210 | out: |
2200 | return err; | 2211 | return err; |
2201 | |||
2202 | finish: | ||
2203 | req->r_err = err; | ||
2204 | complete_request(mdsc, req); | ||
2205 | goto out; | ||
2206 | } | 2212 | } |
2207 | 2213 | ||
2208 | /* | 2214 | /* |
@@ -2289,8 +2295,6 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
2289 | 2295 | ||
2290 | if (req->r_err) { | 2296 | if (req->r_err) { |
2291 | err = req->r_err; | 2297 | err = req->r_err; |
2292 | __unregister_request(mdsc, req); | ||
2293 | dout("do_request early error %d\n", err); | ||
2294 | goto out; | 2298 | goto out; |
2295 | } | 2299 | } |
2296 | 2300 | ||
@@ -2411,7 +2415,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2411 | mutex_unlock(&mdsc->mutex); | 2415 | mutex_unlock(&mdsc->mutex); |
2412 | goto out; | 2416 | goto out; |
2413 | } | 2417 | } |
2414 | if (req->r_got_safe && !head->safe) { | 2418 | if (req->r_got_safe) { |
2415 | pr_warn("got unsafe after safe on %llu from mds%d\n", | 2419 | pr_warn("got unsafe after safe on %llu from mds%d\n", |
2416 | tid, mds); | 2420 | tid, mds); |
2417 | mutex_unlock(&mdsc->mutex); | 2421 | mutex_unlock(&mdsc->mutex); |
@@ -2520,8 +2524,7 @@ out_err: | |||
2520 | if (err) { | 2524 | if (err) { |
2521 | req->r_err = err; | 2525 | req->r_err = err; |
2522 | } else { | 2526 | } else { |
2523 | req->r_reply = msg; | 2527 | req->r_reply = ceph_msg_get(msg); |
2524 | ceph_msg_get(msg); | ||
2525 | req->r_got_result = true; | 2528 | req->r_got_result = true; |
2526 | } | 2529 | } |
2527 | } else { | 2530 | } else { |
@@ -3555,7 +3558,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3555 | { | 3558 | { |
3556 | u64 want_tid, want_flush, want_snap; | 3559 | u64 want_tid, want_flush, want_snap; |
3557 | 3560 | ||
3558 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | 3561 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) |
3559 | return; | 3562 | return; |
3560 | 3563 | ||
3561 | dout("sync\n"); | 3564 | dout("sync\n"); |
@@ -3584,7 +3587,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3584 | */ | 3587 | */ |
3585 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) | 3588 | static bool done_closing_sessions(struct ceph_mds_client *mdsc) |
3586 | { | 3589 | { |
3587 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) | 3590 | if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) |
3588 | return true; | 3591 | return true; |
3589 | return atomic_read(&mdsc->num_sessions) == 0; | 3592 | return atomic_read(&mdsc->num_sessions) == 0; |
3590 | } | 3593 | } |
@@ -3643,6 +3646,34 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3643 | dout("stopped\n"); | 3646 | dout("stopped\n"); |
3644 | } | 3647 | } |
3645 | 3648 | ||
3649 | void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) | ||
3650 | { | ||
3651 | struct ceph_mds_session *session; | ||
3652 | int mds; | ||
3653 | |||
3654 | dout("force umount\n"); | ||
3655 | |||
3656 | mutex_lock(&mdsc->mutex); | ||
3657 | for (mds = 0; mds < mdsc->max_sessions; mds++) { | ||
3658 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
3659 | if (!session) | ||
3660 | continue; | ||
3661 | mutex_unlock(&mdsc->mutex); | ||
3662 | mutex_lock(&session->s_mutex); | ||
3663 | __close_session(mdsc, session); | ||
3664 | if (session->s_state == CEPH_MDS_SESSION_CLOSING) { | ||
3665 | cleanup_session_requests(mdsc, session); | ||
3666 | remove_session_caps(session); | ||
3667 | } | ||
3668 | mutex_unlock(&session->s_mutex); | ||
3669 | ceph_put_mds_session(session); | ||
3670 | mutex_lock(&mdsc->mutex); | ||
3671 | kick_requests(mdsc, mds); | ||
3672 | } | ||
3673 | __wake_requests(mdsc, &mdsc->waiting_for_map); | ||
3674 | mutex_unlock(&mdsc->mutex); | ||
3675 | } | ||
3676 | |||
3646 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | 3677 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) |
3647 | { | 3678 | { |
3648 | dout("stop\n"); | 3679 | dout("stop\n"); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 762757e6cebf..f575eafe2261 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -366,6 +366,7 @@ extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | |||
366 | 366 | ||
367 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); | 367 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); |
368 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | 368 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); |
369 | extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc); | ||
369 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); | 370 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); |
370 | 371 | ||
371 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | 372 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 233d906aec02..4aa7122a8d38 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -338,12 +338,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) | |||
338 | return 0; | 338 | return 0; |
339 | } | 339 | } |
340 | 340 | ||
341 | if (num == 0 && realm->seq == ceph_empty_snapc->seq) { | ||
342 | ceph_get_snap_context(ceph_empty_snapc); | ||
343 | snapc = ceph_empty_snapc; | ||
344 | goto done; | ||
345 | } | ||
346 | |||
347 | /* alloc new snap context */ | 341 | /* alloc new snap context */ |
348 | err = -ENOMEM; | 342 | err = -ENOMEM; |
349 | if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) | 343 | if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) |
@@ -381,7 +375,6 @@ static int build_snap_context(struct ceph_snap_realm *realm) | |||
381 | realm->ino, realm, snapc, snapc->seq, | 375 | realm->ino, realm, snapc, snapc->seq, |
382 | (unsigned int) snapc->num_snaps); | 376 | (unsigned int) snapc->num_snaps); |
383 | 377 | ||
384 | done: | ||
385 | ceph_put_snap_context(realm->cached_context); | 378 | ceph_put_snap_context(realm->cached_context); |
386 | realm->cached_context = snapc; | 379 | realm->cached_context = snapc; |
387 | return 0; | 380 | return 0; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7b6bfcbf801c..f446afada328 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -708,6 +708,7 @@ static void ceph_umount_begin(struct super_block *sb) | |||
708 | if (!fsc) | 708 | if (!fsc) |
709 | return; | 709 | return; |
710 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | 710 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; |
711 | ceph_mdsc_force_umount(fsc->mdsc); | ||
711 | return; | 712 | return; |
712 | } | 713 | } |
713 | 714 | ||
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9ebee53d3bf5..397c5cd09794 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
@@ -46,6 +46,7 @@ struct ceph_options { | |||
46 | unsigned long mount_timeout; /* jiffies */ | 46 | unsigned long mount_timeout; /* jiffies */ |
47 | unsigned long osd_idle_ttl; /* jiffies */ | 47 | unsigned long osd_idle_ttl; /* jiffies */ |
48 | unsigned long osd_keepalive_timeout; /* jiffies */ | 48 | unsigned long osd_keepalive_timeout; /* jiffies */ |
49 | unsigned long monc_ping_timeout; /* jiffies */ | ||
49 | 50 | ||
50 | /* | 51 | /* |
51 | * any type that can't be simply compared or doesn't need need | 52 | * any type that can't be simply compared or doesn't need need |
@@ -66,6 +67,7 @@ struct ceph_options { | |||
66 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) | 67 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) |
67 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) | 68 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) |
68 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) | 69 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) |
70 | #define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) | ||
69 | 71 | ||
70 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 72 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
71 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) | 73 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 37753278987a..7e1252e97a30 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -248,6 +248,8 @@ struct ceph_connection { | |||
248 | int in_base_pos; /* bytes read */ | 248 | int in_base_pos; /* bytes read */ |
249 | __le64 in_temp_ack; /* for reading an ack */ | 249 | __le64 in_temp_ack; /* for reading an ack */ |
250 | 250 | ||
251 | struct timespec last_keepalive_ack; | ||
252 | |||
251 | struct delayed_work work; /* send|recv work */ | 253 | struct delayed_work work; /* send|recv work */ |
252 | unsigned long delay; /* current delay interval */ | 254 | unsigned long delay; /* current delay interval */ |
253 | }; | 255 | }; |
@@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg); | |||
285 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); | 287 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); |
286 | 288 | ||
287 | extern void ceph_con_keepalive(struct ceph_connection *con); | 289 | extern void ceph_con_keepalive(struct ceph_connection *con); |
290 | extern bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
291 | unsigned long interval); | ||
288 | 292 | ||
289 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, | 293 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, |
290 | size_t length, size_t alignment); | 294 | size_t length, size_t alignment); |
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 1c1887206ffa..0fe2656ac415 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
@@ -84,10 +84,12 @@ struct ceph_entity_inst { | |||
84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ | 84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ |
85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ | 85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ |
86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ | 86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ |
87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ |
88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ | 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ |
89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ | 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ |
90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ | 90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ |
91 | #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ | ||
92 | #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ | ||
91 | 93 | ||
92 | 94 | ||
93 | /* | 95 | /* |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 69a4d30a9ccf..54a00d66509e 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; |
358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; | 358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; |
359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; | 359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; |
360 | opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT; | ||
360 | 361 | ||
361 | /* get mon ip(s) */ | 362 | /* get mon ip(s) */ |
362 | /* ip1[:port1][,ip2[:port2]...] */ | 363 | /* ip1[:port1][,ip2[:port2]...] */ |
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 790fe89d90c0..4440edcce0d6 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c | |||
@@ -79,10 +79,6 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) | |||
79 | return 0; | 79 | return 0; |
80 | } | 80 | } |
81 | 81 | ||
82 | |||
83 | |||
84 | #define AES_KEY_SIZE 16 | ||
85 | |||
86 | static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) | 82 | static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) |
87 | { | 83 | { |
88 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); | 84 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index e3be1d22a247..525f454f7531 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache; | |||
163 | static char tag_msg = CEPH_MSGR_TAG_MSG; | 163 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
164 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 164 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | 165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; |
166 | static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; | ||
166 | 167 | ||
167 | #ifdef CONFIG_LOCKDEP | 168 | #ifdef CONFIG_LOCKDEP |
168 | static struct lock_class_key socket_class; | 169 | static struct lock_class_key socket_class; |
@@ -176,7 +177,7 @@ static struct lock_class_key socket_class; | |||
176 | 177 | ||
177 | static void queue_con(struct ceph_connection *con); | 178 | static void queue_con(struct ceph_connection *con); |
178 | static void cancel_con(struct ceph_connection *con); | 179 | static void cancel_con(struct ceph_connection *con); |
179 | static void con_work(struct work_struct *); | 180 | static void ceph_con_workfn(struct work_struct *); |
180 | static void con_fault(struct ceph_connection *con); | 181 | static void con_fault(struct ceph_connection *con); |
181 | 182 | ||
182 | /* | 183 | /* |
@@ -276,22 +277,22 @@ static void _ceph_msgr_exit(void) | |||
276 | ceph_msgr_wq = NULL; | 277 | ceph_msgr_wq = NULL; |
277 | } | 278 | } |
278 | 279 | ||
279 | ceph_msgr_slab_exit(); | ||
280 | |||
281 | BUG_ON(zero_page == NULL); | 280 | BUG_ON(zero_page == NULL); |
282 | page_cache_release(zero_page); | 281 | page_cache_release(zero_page); |
283 | zero_page = NULL; | 282 | zero_page = NULL; |
283 | |||
284 | ceph_msgr_slab_exit(); | ||
284 | } | 285 | } |
285 | 286 | ||
286 | int ceph_msgr_init(void) | 287 | int ceph_msgr_init(void) |
287 | { | 288 | { |
289 | if (ceph_msgr_slab_init()) | ||
290 | return -ENOMEM; | ||
291 | |||
288 | BUG_ON(zero_page != NULL); | 292 | BUG_ON(zero_page != NULL); |
289 | zero_page = ZERO_PAGE(0); | 293 | zero_page = ZERO_PAGE(0); |
290 | page_cache_get(zero_page); | 294 | page_cache_get(zero_page); |
291 | 295 | ||
292 | if (ceph_msgr_slab_init()) | ||
293 | return -ENOMEM; | ||
294 | |||
295 | /* | 296 | /* |
296 | * The number of active work items is limited by the number of | 297 | * The number of active work items is limited by the number of |
297 | * connections, so leave @max_active at default. | 298 | * connections, so leave @max_active at default. |
@@ -749,7 +750,7 @@ void ceph_con_init(struct ceph_connection *con, void *private, | |||
749 | mutex_init(&con->mutex); | 750 | mutex_init(&con->mutex); |
750 | INIT_LIST_HEAD(&con->out_queue); | 751 | INIT_LIST_HEAD(&con->out_queue); |
751 | INIT_LIST_HEAD(&con->out_sent); | 752 | INIT_LIST_HEAD(&con->out_sent); |
752 | INIT_DELAYED_WORK(&con->work, con_work); | 753 | INIT_DELAYED_WORK(&con->work, ceph_con_workfn); |
753 | 754 | ||
754 | con->state = CON_STATE_CLOSED; | 755 | con->state = CON_STATE_CLOSED; |
755 | } | 756 | } |
@@ -1351,7 +1352,15 @@ static void prepare_write_keepalive(struct ceph_connection *con) | |||
1351 | { | 1352 | { |
1352 | dout("prepare_write_keepalive %p\n", con); | 1353 | dout("prepare_write_keepalive %p\n", con); |
1353 | con_out_kvec_reset(con); | 1354 | con_out_kvec_reset(con); |
1354 | con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); | 1355 | if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { |
1356 | struct timespec ts = CURRENT_TIME; | ||
1357 | struct ceph_timespec ceph_ts; | ||
1358 | ceph_encode_timespec(&ceph_ts, &ts); | ||
1359 | con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); | ||
1360 | con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); | ||
1361 | } else { | ||
1362 | con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); | ||
1363 | } | ||
1355 | con_flag_set(con, CON_FLAG_WRITE_PENDING); | 1364 | con_flag_set(con, CON_FLAG_WRITE_PENDING); |
1356 | } | 1365 | } |
1357 | 1366 | ||
@@ -1625,6 +1634,12 @@ static void prepare_read_tag(struct ceph_connection *con) | |||
1625 | con->in_tag = CEPH_MSGR_TAG_READY; | 1634 | con->in_tag = CEPH_MSGR_TAG_READY; |
1626 | } | 1635 | } |
1627 | 1636 | ||
1637 | static void prepare_read_keepalive_ack(struct ceph_connection *con) | ||
1638 | { | ||
1639 | dout("prepare_read_keepalive_ack %p\n", con); | ||
1640 | con->in_base_pos = 0; | ||
1641 | } | ||
1642 | |||
1628 | /* | 1643 | /* |
1629 | * Prepare to read a message. | 1644 | * Prepare to read a message. |
1630 | */ | 1645 | */ |
@@ -2322,13 +2337,6 @@ static int read_partial_message(struct ceph_connection *con) | |||
2322 | return ret; | 2337 | return ret; |
2323 | 2338 | ||
2324 | BUG_ON(!con->in_msg ^ skip); | 2339 | BUG_ON(!con->in_msg ^ skip); |
2325 | if (con->in_msg && data_len > con->in_msg->data_length) { | ||
2326 | pr_warn("%s skipping long message (%u > %zd)\n", | ||
2327 | __func__, data_len, con->in_msg->data_length); | ||
2328 | ceph_msg_put(con->in_msg); | ||
2329 | con->in_msg = NULL; | ||
2330 | skip = 1; | ||
2331 | } | ||
2332 | if (skip) { | 2340 | if (skip) { |
2333 | /* skip this message */ | 2341 | /* skip this message */ |
2334 | dout("alloc_msg said skip message\n"); | 2342 | dout("alloc_msg said skip message\n"); |
@@ -2457,6 +2465,17 @@ static void process_message(struct ceph_connection *con) | |||
2457 | mutex_lock(&con->mutex); | 2465 | mutex_lock(&con->mutex); |
2458 | } | 2466 | } |
2459 | 2467 | ||
2468 | static int read_keepalive_ack(struct ceph_connection *con) | ||
2469 | { | ||
2470 | struct ceph_timespec ceph_ts; | ||
2471 | size_t size = sizeof(ceph_ts); | ||
2472 | int ret = read_partial(con, size, size, &ceph_ts); | ||
2473 | if (ret <= 0) | ||
2474 | return ret; | ||
2475 | ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts); | ||
2476 | prepare_read_tag(con); | ||
2477 | return 1; | ||
2478 | } | ||
2460 | 2479 | ||
2461 | /* | 2480 | /* |
2462 | * Write something to the socket. Called in a worker thread when the | 2481 | * Write something to the socket. Called in a worker thread when the |
@@ -2526,6 +2545,10 @@ more_kvec: | |||
2526 | 2545 | ||
2527 | do_next: | 2546 | do_next: |
2528 | if (con->state == CON_STATE_OPEN) { | 2547 | if (con->state == CON_STATE_OPEN) { |
2548 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
2549 | prepare_write_keepalive(con); | ||
2550 | goto more; | ||
2551 | } | ||
2529 | /* is anything else pending? */ | 2552 | /* is anything else pending? */ |
2530 | if (!list_empty(&con->out_queue)) { | 2553 | if (!list_empty(&con->out_queue)) { |
2531 | prepare_write_message(con); | 2554 | prepare_write_message(con); |
@@ -2535,10 +2558,6 @@ do_next: | |||
2535 | prepare_write_ack(con); | 2558 | prepare_write_ack(con); |
2536 | goto more; | 2559 | goto more; |
2537 | } | 2560 | } |
2538 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
2539 | prepare_write_keepalive(con); | ||
2540 | goto more; | ||
2541 | } | ||
2542 | } | 2561 | } |
2543 | 2562 | ||
2544 | /* Nothing to do! */ | 2563 | /* Nothing to do! */ |
@@ -2641,6 +2660,9 @@ more: | |||
2641 | case CEPH_MSGR_TAG_ACK: | 2660 | case CEPH_MSGR_TAG_ACK: |
2642 | prepare_read_ack(con); | 2661 | prepare_read_ack(con); |
2643 | break; | 2662 | break; |
2663 | case CEPH_MSGR_TAG_KEEPALIVE2_ACK: | ||
2664 | prepare_read_keepalive_ack(con); | ||
2665 | break; | ||
2644 | case CEPH_MSGR_TAG_CLOSE: | 2666 | case CEPH_MSGR_TAG_CLOSE: |
2645 | con_close_socket(con); | 2667 | con_close_socket(con); |
2646 | con->state = CON_STATE_CLOSED; | 2668 | con->state = CON_STATE_CLOSED; |
@@ -2684,6 +2706,12 @@ more: | |||
2684 | process_ack(con); | 2706 | process_ack(con); |
2685 | goto more; | 2707 | goto more; |
2686 | } | 2708 | } |
2709 | if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { | ||
2710 | ret = read_keepalive_ack(con); | ||
2711 | if (ret <= 0) | ||
2712 | goto out; | ||
2713 | goto more; | ||
2714 | } | ||
2687 | 2715 | ||
2688 | out: | 2716 | out: |
2689 | dout("try_read done on %p ret %d\n", con, ret); | 2717 | dout("try_read done on %p ret %d\n", con, ret); |
@@ -2799,7 +2827,7 @@ static void con_fault_finish(struct ceph_connection *con) | |||
2799 | /* | 2827 | /* |
2800 | * Do some work on a connection. Drop a connection ref when we're done. | 2828 | * Do some work on a connection. Drop a connection ref when we're done. |
2801 | */ | 2829 | */ |
2802 | static void con_work(struct work_struct *work) | 2830 | static void ceph_con_workfn(struct work_struct *work) |
2803 | { | 2831 | { |
2804 | struct ceph_connection *con = container_of(work, struct ceph_connection, | 2832 | struct ceph_connection *con = container_of(work, struct ceph_connection, |
2805 | work.work); | 2833 | work.work); |
@@ -3101,6 +3129,20 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
3101 | } | 3129 | } |
3102 | EXPORT_SYMBOL(ceph_con_keepalive); | 3130 | EXPORT_SYMBOL(ceph_con_keepalive); |
3103 | 3131 | ||
3132 | bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
3133 | unsigned long interval) | ||
3134 | { | ||
3135 | if (interval > 0 && | ||
3136 | (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { | ||
3137 | struct timespec now = CURRENT_TIME; | ||
3138 | struct timespec ts; | ||
3139 | jiffies_to_timespec(interval, &ts); | ||
3140 | ts = timespec_add(con->last_keepalive_ack, ts); | ||
3141 | return timespec_compare(&now, &ts) >= 0; | ||
3142 | } | ||
3143 | return false; | ||
3144 | } | ||
3145 | |||
3104 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) | 3146 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) |
3105 | { | 3147 | { |
3106 | struct ceph_msg_data *data; | 3148 | struct ceph_msg_data *data; |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9d6ff1215928..edda01626a45 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
@@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc) | |||
149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, | 149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, |
150 | &monc->monmap->mon_inst[monc->cur_mon].addr); | 150 | &monc->monmap->mon_inst[monc->cur_mon].addr); |
151 | 151 | ||
152 | /* send an initial keepalive to ensure our timestamp is | ||
153 | * valid by the time we are in an OPENED state */ | ||
154 | ceph_con_keepalive(&monc->con); | ||
155 | |||
152 | /* initiatiate authentication handshake */ | 156 | /* initiatiate authentication handshake */ |
153 | ret = ceph_auth_build_hello(monc->auth, | 157 | ret = ceph_auth_build_hello(monc->auth, |
154 | monc->m_auth->front.iov_base, | 158 | monc->m_auth->front.iov_base, |
@@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc) | |||
170 | */ | 174 | */ |
171 | static void __schedule_delayed(struct ceph_mon_client *monc) | 175 | static void __schedule_delayed(struct ceph_mon_client *monc) |
172 | { | 176 | { |
173 | unsigned int delay; | 177 | struct ceph_options *opt = monc->client->options; |
178 | unsigned long delay; | ||
174 | 179 | ||
175 | if (monc->cur_mon < 0 || __sub_expired(monc)) | 180 | if (monc->cur_mon < 0 || __sub_expired(monc)) { |
176 | delay = 10 * HZ; | 181 | delay = 10 * HZ; |
177 | else | 182 | } else { |
178 | delay = 20 * HZ; | 183 | delay = 20 * HZ; |
179 | dout("__schedule_delayed after %u\n", delay); | 184 | if (opt->monc_ping_timeout > 0) |
180 | schedule_delayed_work(&monc->delayed_work, delay); | 185 | delay = min(delay, opt->monc_ping_timeout / 3); |
186 | } | ||
187 | dout("__schedule_delayed after %lu\n", delay); | ||
188 | schedule_delayed_work(&monc->delayed_work, | ||
189 | round_jiffies_relative(delay)); | ||
181 | } | 190 | } |
182 | 191 | ||
183 | /* | 192 | /* |
@@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work) | |||
743 | __close_session(monc); | 752 | __close_session(monc); |
744 | __open_session(monc); /* continue hunting */ | 753 | __open_session(monc); /* continue hunting */ |
745 | } else { | 754 | } else { |
746 | ceph_con_keepalive(&monc->con); | 755 | struct ceph_options *opt = monc->client->options; |
756 | int is_auth = ceph_auth_is_authenticated(monc->auth); | ||
757 | if (ceph_con_keepalive_expired(&monc->con, | ||
758 | opt->monc_ping_timeout)) { | ||
759 | dout("monc keepalive timeout\n"); | ||
760 | is_auth = 0; | ||
761 | __close_session(monc); | ||
762 | monc->hunting = true; | ||
763 | __open_session(monc); | ||
764 | } | ||
747 | 765 | ||
748 | __validate_auth(monc); | 766 | if (!monc->hunting) { |
767 | ceph_con_keepalive(&monc->con); | ||
768 | __validate_auth(monc); | ||
769 | } | ||
749 | 770 | ||
750 | if (ceph_auth_is_authenticated(monc->auth)) | 771 | if (is_auth) |
751 | __send_subscribe(monc); | 772 | __send_subscribe(monc); |
752 | } | 773 | } |
753 | __schedule_delayed(monc); | 774 | __schedule_delayed(monc); |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 50033677c0fa..80b94e37c94a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -2817,8 +2817,9 @@ out: | |||
2817 | } | 2817 | } |
2818 | 2818 | ||
2819 | /* | 2819 | /* |
2820 | * lookup and return message for incoming reply. set up reply message | 2820 | * Lookup and return message for incoming reply. Don't try to do |
2821 | * pages. | 2821 | * anything about a larger than preallocated data portion of the |
2822 | * message at the moment - for now, just skip the message. | ||
2822 | */ | 2823 | */ |
2823 | static struct ceph_msg *get_reply(struct ceph_connection *con, | 2824 | static struct ceph_msg *get_reply(struct ceph_connection *con, |
2824 | struct ceph_msg_header *hdr, | 2825 | struct ceph_msg_header *hdr, |
@@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
2836 | mutex_lock(&osdc->request_mutex); | 2837 | mutex_lock(&osdc->request_mutex); |
2837 | req = __lookup_request(osdc, tid); | 2838 | req = __lookup_request(osdc, tid); |
2838 | if (!req) { | 2839 | if (!req) { |
2839 | *skip = 1; | 2840 | pr_warn("%s osd%d tid %llu unknown, skipping\n", |
2841 | __func__, osd->o_osd, tid); | ||
2840 | m = NULL; | 2842 | m = NULL; |
2841 | dout("get_reply unknown tid %llu from osd%d\n", tid, | 2843 | *skip = 1; |
2842 | osd->o_osd); | ||
2843 | goto out; | 2844 | goto out; |
2844 | } | 2845 | } |
2845 | 2846 | ||
@@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
2849 | ceph_msg_revoke_incoming(req->r_reply); | 2850 | ceph_msg_revoke_incoming(req->r_reply); |
2850 | 2851 | ||
2851 | if (front_len > req->r_reply->front_alloc_len) { | 2852 | if (front_len > req->r_reply->front_alloc_len) { |
2852 | pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", | 2853 | pr_warn("%s osd%d tid %llu front %d > preallocated %d\n", |
2853 | front_len, req->r_reply->front_alloc_len, | 2854 | __func__, osd->o_osd, req->r_tid, front_len, |
2854 | (unsigned int)con->peer_name.type, | 2855 | req->r_reply->front_alloc_len); |
2855 | le64_to_cpu(con->peer_name.num)); | ||
2856 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, | 2856 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, |
2857 | false); | 2857 | false); |
2858 | if (!m) | 2858 | if (!m) |
@@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
2860 | ceph_msg_put(req->r_reply); | 2860 | ceph_msg_put(req->r_reply); |
2861 | req->r_reply = m; | 2861 | req->r_reply = m; |
2862 | } | 2862 | } |
2863 | m = ceph_msg_get(req->r_reply); | ||
2864 | |||
2865 | if (data_len > 0) { | ||
2866 | struct ceph_osd_data *osd_data; | ||
2867 | 2863 | ||
2868 | /* | 2864 | if (data_len > req->r_reply->data_length) { |
2869 | * XXX This is assuming there is only one op containing | 2865 | pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", |
2870 | * XXX page data. Probably OK for reads, but this | 2866 | __func__, osd->o_osd, req->r_tid, data_len, |
2871 | * XXX ought to be done more generally. | 2867 | req->r_reply->data_length); |
2872 | */ | 2868 | m = NULL; |
2873 | osd_data = osd_req_op_extent_osd_data(req, 0); | 2869 | *skip = 1; |
2874 | if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { | 2870 | goto out; |
2875 | if (osd_data->pages && | ||
2876 | unlikely(osd_data->length < data_len)) { | ||
2877 | |||
2878 | pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n", | ||
2879 | tid, data_len, osd_data->length); | ||
2880 | *skip = 1; | ||
2881 | ceph_msg_put(m); | ||
2882 | m = NULL; | ||
2883 | goto out; | ||
2884 | } | ||
2885 | } | ||
2886 | } | 2871 | } |
2887 | *skip = 0; | 2872 | |
2873 | m = ceph_msg_get(req->r_reply); | ||
2888 | dout("get_reply tid %lld %p\n", tid, m); | 2874 | dout("get_reply tid %lld %p\n", tid, m); |
2889 | 2875 | ||
2890 | out: | 2876 | out: |
2891 | mutex_unlock(&osdc->request_mutex); | 2877 | mutex_unlock(&osdc->request_mutex); |
2892 | return m; | 2878 | return m; |
2893 | |||
2894 | } | 2879 | } |
2895 | 2880 | ||
2896 | static struct ceph_msg *alloc_msg(struct ceph_connection *con, | 2881 | static struct ceph_msg *alloc_msg(struct ceph_connection *con, |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 4a3125836b64..7d8f581d9f1f 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -1300,7 +1300,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
1300 | ceph_decode_addr(&addr); | 1300 | ceph_decode_addr(&addr); |
1301 | pr_info("osd%d up\n", osd); | 1301 | pr_info("osd%d up\n", osd); |
1302 | BUG_ON(osd >= map->max_osd); | 1302 | BUG_ON(osd >= map->max_osd); |
1303 | map->osd_state[osd] |= CEPH_OSD_UP; | 1303 | map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS; |
1304 | map->osd_addr[osd] = addr; | 1304 | map->osd_addr[osd] = addr; |
1305 | } | 1305 | } |
1306 | 1306 | ||