diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2015-09-02 04:37:09 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-09-09 02:52:17 -0400 |
commit | d15f9d694b77fe5e4ea12b3031ecaa13b5aa2b10 (patch) | |
tree | c5341ec8d2b91adaa17d4ef3177a0b04ef9581a7 /net | |
parent | 8b9558aab853e98ba6e3fee0dd8545544966958c (diff) |
libceph: check data_len in ->alloc_msg()
Only ->alloc_msg() should check data_len of the incoming message
against the preallocated ceph_msg, doing it in the messenger is not
right. The contract is that either ->alloc_msg() returns a ceph_msg
which will fit all of the portions of the incoming message, or it
returns NULL and possibly sets skip, signaling whether NULL is due to
an -ENOMEM. ->alloc_msg() should be the only place where we make the
skip/no-skip decision.
I stumbled upon this while looking at con/osd ref counting. Right now,
if we get a non-extent message with a larger data portion than we are
prepared for, ->alloc_msg() returns a ceph_msg, and then, when we skip
it in the messenger, we don't put the con/osd ref acquired in
ceph_con_in_msg_alloc() (which is normally put in process_message()),
so this also fixes a memory leak.
An existing BUG_ON in ceph_msg_data_cursor_init() ensures we don't
corrupt random memory should a buggy ->alloc_msg() return an unfit
ceph_msg.
While at it, I changed the "unknown tid" dout() to a pr_warn() to make
sure all skips are seen and unified format strings.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/ceph/messenger.c | 7 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 51 |
2 files changed, 18 insertions, 40 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 36757d46ac40..525f454f7531 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -2337,13 +2337,6 @@ static int read_partial_message(struct ceph_connection *con) | |||
2337 | return ret; | 2337 | return ret; |
2338 | 2338 | ||
2339 | BUG_ON(!con->in_msg ^ skip); | 2339 | BUG_ON(!con->in_msg ^ skip); |
2340 | if (con->in_msg && data_len > con->in_msg->data_length) { | ||
2341 | pr_warn("%s skipping long message (%u > %zd)\n", | ||
2342 | __func__, data_len, con->in_msg->data_length); | ||
2343 | ceph_msg_put(con->in_msg); | ||
2344 | con->in_msg = NULL; | ||
2345 | skip = 1; | ||
2346 | } | ||
2347 | if (skip) { | 2340 | if (skip) { |
2348 | /* skip this message */ | 2341 | /* skip this message */ |
2349 | dout("alloc_msg said skip message\n"); | 2342 | dout("alloc_msg said skip message\n"); |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 50033677c0fa..80b94e37c94a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -2817,8 +2817,9 @@ out: | |||
2817 | } | 2817 | } |
2818 | 2818 | ||
2819 | /* | 2819 | /* |
2820 | * lookup and return message for incoming reply. set up reply message | 2820 | * Lookup and return message for incoming reply. Don't try to do |
2821 | * pages. | 2821 | * anything about a larger than preallocated data portion of the |
2822 | * message at the moment - for now, just skip the message. | ||
2822 | */ | 2823 | */ |
2823 | static struct ceph_msg *get_reply(struct ceph_connection *con, | 2824 | static struct ceph_msg *get_reply(struct ceph_connection *con, |
2824 | struct ceph_msg_header *hdr, | 2825 | struct ceph_msg_header *hdr, |
@@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
2836 | mutex_lock(&osdc->request_mutex); | 2837 | mutex_lock(&osdc->request_mutex); |
2837 | req = __lookup_request(osdc, tid); | 2838 | req = __lookup_request(osdc, tid); |
2838 | if (!req) { | 2839 | if (!req) { |
2839 | *skip = 1; | 2840 | pr_warn("%s osd%d tid %llu unknown, skipping\n", |
2841 | __func__, osd->o_osd, tid); | ||
2840 | m = NULL; | 2842 | m = NULL; |
2841 | dout("get_reply unknown tid %llu from osd%d\n", tid, | 2843 | *skip = 1; |
2842 | osd->o_osd); | ||
2843 | goto out; | 2844 | goto out; |
2844 | } | 2845 | } |
2845 | 2846 | ||
@@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
2849 | ceph_msg_revoke_incoming(req->r_reply); | 2850 | ceph_msg_revoke_incoming(req->r_reply); |
2850 | 2851 | ||
2851 | if (front_len > req->r_reply->front_alloc_len) { | 2852 | if (front_len > req->r_reply->front_alloc_len) { |
2852 | pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", | 2853 | pr_warn("%s osd%d tid %llu front %d > preallocated %d\n", |
2853 | front_len, req->r_reply->front_alloc_len, | 2854 | __func__, osd->o_osd, req->r_tid, front_len, |
2854 | (unsigned int)con->peer_name.type, | 2855 | req->r_reply->front_alloc_len); |
2855 | le64_to_cpu(con->peer_name.num)); | ||
2856 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, | 2856 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, |
2857 | false); | 2857 | false); |
2858 | if (!m) | 2858 | if (!m) |
@@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
2860 | ceph_msg_put(req->r_reply); | 2860 | ceph_msg_put(req->r_reply); |
2861 | req->r_reply = m; | 2861 | req->r_reply = m; |
2862 | } | 2862 | } |
2863 | m = ceph_msg_get(req->r_reply); | ||
2864 | |||
2865 | if (data_len > 0) { | ||
2866 | struct ceph_osd_data *osd_data; | ||
2867 | 2863 | ||
2868 | /* | 2864 | if (data_len > req->r_reply->data_length) { |
2869 | * XXX This is assuming there is only one op containing | 2865 | pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", |
2870 | * XXX page data. Probably OK for reads, but this | 2866 | __func__, osd->o_osd, req->r_tid, data_len, |
2871 | * XXX ought to be done more generally. | 2867 | req->r_reply->data_length); |
2872 | */ | 2868 | m = NULL; |
2873 | osd_data = osd_req_op_extent_osd_data(req, 0); | 2869 | *skip = 1; |
2874 | if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { | 2870 | goto out; |
2875 | if (osd_data->pages && | ||
2876 | unlikely(osd_data->length < data_len)) { | ||
2877 | |||
2878 | pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n", | ||
2879 | tid, data_len, osd_data->length); | ||
2880 | *skip = 1; | ||
2881 | ceph_msg_put(m); | ||
2882 | m = NULL; | ||
2883 | goto out; | ||
2884 | } | ||
2885 | } | ||
2886 | } | 2871 | } |
2887 | *skip = 0; | 2872 | |
2873 | m = ceph_msg_get(req->r_reply); | ||
2888 | dout("get_reply tid %lld %p\n", tid, m); | 2874 | dout("get_reply tid %lld %p\n", tid, m); |
2889 | 2875 | ||
2890 | out: | 2876 | out: |
2891 | mutex_unlock(&osdc->request_mutex); | 2877 | mutex_unlock(&osdc->request_mutex); |
2892 | return m; | 2878 | return m; |
2893 | |||
2894 | } | 2879 | } |
2895 | 2880 | ||
2896 | static struct ceph_msg *alloc_msg(struct ceph_connection *con, | 2881 | static struct ceph_msg *alloc_msg(struct ceph_connection *con, |