summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2015-09-02 04:37:09 -0400
committerIlya Dryomov <idryomov@gmail.com>2015-09-09 02:52:17 -0400
commitd15f9d694b77fe5e4ea12b3031ecaa13b5aa2b10 (patch)
treec5341ec8d2b91adaa17d4ef3177a0b04ef9581a7 /net
parent8b9558aab853e98ba6e3fee0dd8545544966958c (diff)
libceph: check data_len in ->alloc_msg()
Only ->alloc_msg() should check data_len of the incoming message against the preallocated ceph_msg, doing it in the messenger is not right. The contract is that either ->alloc_msg() returns a ceph_msg which will fit all of the portions of the incoming message, or it returns NULL and possibly sets skip, signaling whether NULL is due to an -ENOMEM. ->alloc_msg() should be the only place where we make the skip/no-skip decision. I stumbled upon this while looking at con/osd ref counting. Right now, if we get a non-extent message with a larger data portion than we are prepared for, ->alloc_msg() returns a ceph_msg, and then, when we skip it in the messenger, we don't put the con/osd ref acquired in ceph_con_in_msg_alloc() (which is normally put in process_message()), so this also fixes a memory leak. An existing BUG_ON in ceph_msg_data_cursor_init() ensures we don't corrupt random memory should a buggy ->alloc_msg() return an unfit ceph_msg. While at it, I changed the "unknown tid" dout() to a pr_warn() to make sure all skips are seen and unified format strings. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Alex Elder <elder@linaro.org>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/messenger.c7
-rw-r--r--net/ceph/osd_client.c51
2 files changed, 18 insertions, 40 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 36757d46ac40..525f454f7531 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2337,13 +2337,6 @@ static int read_partial_message(struct ceph_connection *con)
2337 return ret; 2337 return ret;
2338 2338
2339 BUG_ON(!con->in_msg ^ skip); 2339 BUG_ON(!con->in_msg ^ skip);
2340 if (con->in_msg && data_len > con->in_msg->data_length) {
2341 pr_warn("%s skipping long message (%u > %zd)\n",
2342 __func__, data_len, con->in_msg->data_length);
2343 ceph_msg_put(con->in_msg);
2344 con->in_msg = NULL;
2345 skip = 1;
2346 }
2347 if (skip) { 2340 if (skip) {
2348 /* skip this message */ 2341 /* skip this message */
2349 dout("alloc_msg said skip message\n"); 2342 dout("alloc_msg said skip message\n");
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 50033677c0fa..80b94e37c94a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2817,8 +2817,9 @@ out:
2817} 2817}
2818 2818
2819/* 2819/*
2820 * lookup and return message for incoming reply. set up reply message 2820 * Lookup and return message for incoming reply. Don't try to do
2821 * pages. 2821 * anything about a larger than preallocated data portion of the
2822 * message at the moment - for now, just skip the message.
2822 */ 2823 */
2823static struct ceph_msg *get_reply(struct ceph_connection *con, 2824static struct ceph_msg *get_reply(struct ceph_connection *con,
2824 struct ceph_msg_header *hdr, 2825 struct ceph_msg_header *hdr,
@@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2836 mutex_lock(&osdc->request_mutex); 2837 mutex_lock(&osdc->request_mutex);
2837 req = __lookup_request(osdc, tid); 2838 req = __lookup_request(osdc, tid);
2838 if (!req) { 2839 if (!req) {
2839 *skip = 1; 2840 pr_warn("%s osd%d tid %llu unknown, skipping\n",
2841 __func__, osd->o_osd, tid);
2840 m = NULL; 2842 m = NULL;
2841 dout("get_reply unknown tid %llu from osd%d\n", tid, 2843 *skip = 1;
2842 osd->o_osd);
2843 goto out; 2844 goto out;
2844 } 2845 }
2845 2846
@@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2849 ceph_msg_revoke_incoming(req->r_reply); 2850 ceph_msg_revoke_incoming(req->r_reply);
2850 2851
2851 if (front_len > req->r_reply->front_alloc_len) { 2852 if (front_len > req->r_reply->front_alloc_len) {
2852 pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", 2853 pr_warn("%s osd%d tid %llu front %d > preallocated %d\n",
2853 front_len, req->r_reply->front_alloc_len, 2854 __func__, osd->o_osd, req->r_tid, front_len,
2854 (unsigned int)con->peer_name.type, 2855 req->r_reply->front_alloc_len);
2855 le64_to_cpu(con->peer_name.num));
2856 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, 2856 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS,
2857 false); 2857 false);
2858 if (!m) 2858 if (!m)
@@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2860 ceph_msg_put(req->r_reply); 2860 ceph_msg_put(req->r_reply);
2861 req->r_reply = m; 2861 req->r_reply = m;
2862 } 2862 }
2863 m = ceph_msg_get(req->r_reply);
2864
2865 if (data_len > 0) {
2866 struct ceph_osd_data *osd_data;
2867 2863
2868 /* 2864 if (data_len > req->r_reply->data_length) {
2869 * XXX This is assuming there is only one op containing 2865 pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
2870 * XXX page data. Probably OK for reads, but this 2866 __func__, osd->o_osd, req->r_tid, data_len,
2871 * XXX ought to be done more generally. 2867 req->r_reply->data_length);
2872 */ 2868 m = NULL;
2873 osd_data = osd_req_op_extent_osd_data(req, 0); 2869 *skip = 1;
2874 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { 2870 goto out;
2875 if (osd_data->pages &&
2876 unlikely(osd_data->length < data_len)) {
2877
2878 pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n",
2879 tid, data_len, osd_data->length);
2880 *skip = 1;
2881 ceph_msg_put(m);
2882 m = NULL;
2883 goto out;
2884 }
2885 }
2886 } 2871 }
2887 *skip = 0; 2872
2873 m = ceph_msg_get(req->r_reply);
2888 dout("get_reply tid %lld %p\n", tid, m); 2874 dout("get_reply tid %lld %p\n", tid, m);
2889 2875
2890out: 2876out:
2891 mutex_unlock(&osdc->request_mutex); 2877 mutex_unlock(&osdc->request_mutex);
2892 return m; 2878 return m;
2893
2894} 2879}
2895 2880
2896static struct ceph_msg *alloc_msg(struct ceph_connection *con, 2881static struct ceph_msg *alloc_msg(struct ceph_connection *con,