aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/auth_x.c256
-rw-r--r--net/ceph/messenger.c47
-rw-r--r--net/ceph/mon_client.c8
-rw-r--r--net/ceph/osd_client.c129
4 files changed, 264 insertions, 176 deletions
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 96238ba95f2b..de6662b14e1f 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -13,8 +13,6 @@
13#include "auth_x.h" 13#include "auth_x.h"
14#include "auth_x_protocol.h" 14#include "auth_x_protocol.h"
15 15
16#define TEMP_TICKET_BUF_LEN 256
17
18static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); 16static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
19 17
20static int ceph_x_is_authenticated(struct ceph_auth_client *ac) 18static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
@@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret,
64} 62}
65 63
66static int ceph_x_decrypt(struct ceph_crypto_key *secret, 64static int ceph_x_decrypt(struct ceph_crypto_key *secret,
67 void **p, void *end, void *obuf, size_t olen) 65 void **p, void *end, void **obuf, size_t olen)
68{ 66{
69 struct ceph_x_encrypt_header head; 67 struct ceph_x_encrypt_header head;
70 size_t head_len = sizeof(head); 68 size_t head_len = sizeof(head);
@@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret,
75 return -EINVAL; 73 return -EINVAL;
76 74
77 dout("ceph_x_decrypt len %d\n", len); 75 dout("ceph_x_decrypt len %d\n", len);
78 ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen, 76 if (*obuf == NULL) {
79 *p, len); 77 *obuf = kmalloc(len, GFP_NOFS);
78 if (!*obuf)
79 return -ENOMEM;
80 olen = len;
81 }
82
83 ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len);
80 if (ret) 84 if (ret)
81 return ret; 85 return ret;
82 if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) 86 if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
@@ -129,139 +133,120 @@ static void remove_ticket_handler(struct ceph_auth_client *ac,
129 kfree(th); 133 kfree(th);
130} 134}
131 135
132static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, 136static int process_one_ticket(struct ceph_auth_client *ac,
133 struct ceph_crypto_key *secret, 137 struct ceph_crypto_key *secret,
134 void *buf, void *end) 138 void **p, void *end)
135{ 139{
136 struct ceph_x_info *xi = ac->private; 140 struct ceph_x_info *xi = ac->private;
137 int num; 141 int type;
138 void *p = buf; 142 u8 tkt_struct_v, blob_struct_v;
143 struct ceph_x_ticket_handler *th;
144 void *dbuf = NULL;
145 void *dp, *dend;
146 int dlen;
147 char is_enc;
148 struct timespec validity;
149 struct ceph_crypto_key old_key;
150 void *ticket_buf = NULL;
151 void *tp, *tpend;
152 struct ceph_timespec new_validity;
153 struct ceph_crypto_key new_session_key;
154 struct ceph_buffer *new_ticket_blob;
155 unsigned long new_expires, new_renew_after;
156 u64 new_secret_id;
139 int ret; 157 int ret;
140 char *dbuf;
141 char *ticket_buf;
142 u8 reply_struct_v;
143 158
144 dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); 159 ceph_decode_need(p, end, sizeof(u32) + 1, bad);
145 if (!dbuf)
146 return -ENOMEM;
147 160
148 ret = -ENOMEM; 161 type = ceph_decode_32(p);
149 ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); 162 dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
150 if (!ticket_buf)
151 goto out_dbuf;
152 163
153 ceph_decode_need(&p, end, 1 + sizeof(u32), bad); 164 tkt_struct_v = ceph_decode_8(p);
154 reply_struct_v = ceph_decode_8(&p); 165 if (tkt_struct_v != 1)
155 if (reply_struct_v != 1)
156 goto bad; 166 goto bad;
157 num = ceph_decode_32(&p);
158 dout("%d tickets\n", num);
159 while (num--) {
160 int type;
161 u8 tkt_struct_v, blob_struct_v;
162 struct ceph_x_ticket_handler *th;
163 void *dp, *dend;
164 int dlen;
165 char is_enc;
166 struct timespec validity;
167 struct ceph_crypto_key old_key;
168 void *tp, *tpend;
169 struct ceph_timespec new_validity;
170 struct ceph_crypto_key new_session_key;
171 struct ceph_buffer *new_ticket_blob;
172 unsigned long new_expires, new_renew_after;
173 u64 new_secret_id;
174
175 ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
176
177 type = ceph_decode_32(&p);
178 dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
179
180 tkt_struct_v = ceph_decode_8(&p);
181 if (tkt_struct_v != 1)
182 goto bad;
183
184 th = get_ticket_handler(ac, type);
185 if (IS_ERR(th)) {
186 ret = PTR_ERR(th);
187 goto out;
188 }
189 167
190 /* blob for me */ 168 th = get_ticket_handler(ac, type);
191 dlen = ceph_x_decrypt(secret, &p, end, dbuf, 169 if (IS_ERR(th)) {
192 TEMP_TICKET_BUF_LEN); 170 ret = PTR_ERR(th);
193 if (dlen <= 0) { 171 goto out;
194 ret = dlen; 172 }
195 goto out;
196 }
197 dout(" decrypted %d bytes\n", dlen);
198 dend = dbuf + dlen;
199 dp = dbuf;
200 173
201 tkt_struct_v = ceph_decode_8(&dp); 174 /* blob for me */
202 if (tkt_struct_v != 1) 175 dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0);
203 goto bad; 176 if (dlen <= 0) {
177 ret = dlen;
178 goto out;
179 }
180 dout(" decrypted %d bytes\n", dlen);
181 dp = dbuf;
182 dend = dp + dlen;
204 183
205 memcpy(&old_key, &th->session_key, sizeof(old_key)); 184 tkt_struct_v = ceph_decode_8(&dp);
206 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); 185 if (tkt_struct_v != 1)
207 if (ret) 186 goto bad;
208 goto out;
209 187
210 ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); 188 memcpy(&old_key, &th->session_key, sizeof(old_key));
211 ceph_decode_timespec(&validity, &new_validity); 189 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
212 new_expires = get_seconds() + validity.tv_sec; 190 if (ret)
213 new_renew_after = new_expires - (validity.tv_sec / 4); 191 goto out;
214 dout(" expires=%lu renew_after=%lu\n", new_expires,
215 new_renew_after);
216 192
217 /* ticket blob for service */ 193 ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
218 ceph_decode_8_safe(&p, end, is_enc, bad); 194 ceph_decode_timespec(&validity, &new_validity);
219 tp = ticket_buf; 195 new_expires = get_seconds() + validity.tv_sec;
220 if (is_enc) { 196 new_renew_after = new_expires - (validity.tv_sec / 4);
221 /* encrypted */ 197 dout(" expires=%lu renew_after=%lu\n", new_expires,
222 dout(" encrypted ticket\n"); 198 new_renew_after);
223 dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf, 199
224 TEMP_TICKET_BUF_LEN); 200 /* ticket blob for service */
225 if (dlen < 0) { 201 ceph_decode_8_safe(p, end, is_enc, bad);
226 ret = dlen; 202 if (is_enc) {
227 goto out; 203 /* encrypted */
228 } 204 dout(" encrypted ticket\n");
229 dlen = ceph_decode_32(&tp); 205 dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0);
230 } else { 206 if (dlen < 0) {
231 /* unencrypted */ 207 ret = dlen;
232 ceph_decode_32_safe(&p, end, dlen, bad); 208 goto out;
233 ceph_decode_need(&p, end, dlen, bad);
234 ceph_decode_copy(&p, ticket_buf, dlen);
235 } 209 }
236 tpend = tp + dlen; 210 tp = ticket_buf;
237 dout(" ticket blob is %d bytes\n", dlen); 211 dlen = ceph_decode_32(&tp);
238 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); 212 } else {
239 blob_struct_v = ceph_decode_8(&tp); 213 /* unencrypted */
240 new_secret_id = ceph_decode_64(&tp); 214 ceph_decode_32_safe(p, end, dlen, bad);
241 ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); 215 ticket_buf = kmalloc(dlen, GFP_NOFS);
242 if (ret) 216 if (!ticket_buf) {
217 ret = -ENOMEM;
243 goto out; 218 goto out;
244 219 }
245 /* all is well, update our ticket */ 220 tp = ticket_buf;
246 ceph_crypto_key_destroy(&th->session_key); 221 ceph_decode_need(p, end, dlen, bad);
247 if (th->ticket_blob) 222 ceph_decode_copy(p, ticket_buf, dlen);
248 ceph_buffer_put(th->ticket_blob);
249 th->session_key = new_session_key;
250 th->ticket_blob = new_ticket_blob;
251 th->validity = new_validity;
252 th->secret_id = new_secret_id;
253 th->expires = new_expires;
254 th->renew_after = new_renew_after;
255 dout(" got ticket service %d (%s) secret_id %lld len %d\n",
256 type, ceph_entity_type_name(type), th->secret_id,
257 (int)th->ticket_blob->vec.iov_len);
258 xi->have_keys |= th->service;
259 } 223 }
224 tpend = tp + dlen;
225 dout(" ticket blob is %d bytes\n", dlen);
226 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
227 blob_struct_v = ceph_decode_8(&tp);
228 new_secret_id = ceph_decode_64(&tp);
229 ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
230 if (ret)
231 goto out;
232
233 /* all is well, update our ticket */
234 ceph_crypto_key_destroy(&th->session_key);
235 if (th->ticket_blob)
236 ceph_buffer_put(th->ticket_blob);
237 th->session_key = new_session_key;
238 th->ticket_blob = new_ticket_blob;
239 th->validity = new_validity;
240 th->secret_id = new_secret_id;
241 th->expires = new_expires;
242 th->renew_after = new_renew_after;
243 dout(" got ticket service %d (%s) secret_id %lld len %d\n",
244 type, ceph_entity_type_name(type), th->secret_id,
245 (int)th->ticket_blob->vec.iov_len);
246 xi->have_keys |= th->service;
260 247
261 ret = 0;
262out: 248out:
263 kfree(ticket_buf); 249 kfree(ticket_buf);
264out_dbuf:
265 kfree(dbuf); 250 kfree(dbuf);
266 return ret; 251 return ret;
267 252
@@ -270,6 +255,34 @@ bad:
270 goto out; 255 goto out;
271} 256}
272 257
258static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
259 struct ceph_crypto_key *secret,
260 void *buf, void *end)
261{
262 void *p = buf;
263 u8 reply_struct_v;
264 u32 num;
265 int ret;
266
267 ceph_decode_8_safe(&p, end, reply_struct_v, bad);
268 if (reply_struct_v != 1)
269 return -EINVAL;
270
271 ceph_decode_32_safe(&p, end, num, bad);
272 dout("%d tickets\n", num);
273
274 while (num--) {
275 ret = process_one_ticket(ac, secret, &p, end);
276 if (ret)
277 return ret;
278 }
279
280 return 0;
281
282bad:
283 return -EINVAL;
284}
285
273static int ceph_x_build_authorizer(struct ceph_auth_client *ac, 286static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
274 struct ceph_x_ticket_handler *th, 287 struct ceph_x_ticket_handler *th,
275 struct ceph_x_authorizer *au) 288 struct ceph_x_authorizer *au)
@@ -583,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
583 struct ceph_x_ticket_handler *th; 596 struct ceph_x_ticket_handler *th;
584 int ret = 0; 597 int ret = 0;
585 struct ceph_x_authorize_reply reply; 598 struct ceph_x_authorize_reply reply;
599 void *preply = &reply;
586 void *p = au->reply_buf; 600 void *p = au->reply_buf;
587 void *end = p + sizeof(au->reply_buf); 601 void *end = p + sizeof(au->reply_buf);
588 602
589 th = get_ticket_handler(ac, au->service); 603 th = get_ticket_handler(ac, au->service);
590 if (IS_ERR(th)) 604 if (IS_ERR(th))
591 return PTR_ERR(th); 605 return PTR_ERR(th);
592 ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); 606 ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
593 if (ret < 0) 607 if (ret < 0)
594 return ret; 608 return ret;
595 if (ret != sizeof(reply)) 609 if (ret != sizeof(reply))
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1948d592aa54..b2f571dd933d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -174,6 +174,7 @@ static struct lock_class_key socket_class;
174#define SKIP_BUF_SIZE 1024 174#define SKIP_BUF_SIZE 1024
175 175
176static void queue_con(struct ceph_connection *con); 176static void queue_con(struct ceph_connection *con);
177static void cancel_con(struct ceph_connection *con);
177static void con_work(struct work_struct *); 178static void con_work(struct work_struct *);
178static void con_fault(struct ceph_connection *con); 179static void con_fault(struct ceph_connection *con);
179 180
@@ -680,7 +681,7 @@ void ceph_con_close(struct ceph_connection *con)
680 681
681 reset_connection(con); 682 reset_connection(con);
682 con->peer_global_seq = 0; 683 con->peer_global_seq = 0;
683 cancel_delayed_work(&con->work); 684 cancel_con(con);
684 con_close_socket(con); 685 con_close_socket(con);
685 mutex_unlock(&con->mutex); 686 mutex_unlock(&con->mutex);
686} 687}
@@ -900,7 +901,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
900 BUG_ON(page_count > (int)USHRT_MAX); 901 BUG_ON(page_count > (int)USHRT_MAX);
901 cursor->page_count = (unsigned short)page_count; 902 cursor->page_count = (unsigned short)page_count;
902 BUG_ON(length > SIZE_MAX - cursor->page_offset); 903 BUG_ON(length > SIZE_MAX - cursor->page_offset);
903 cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE; 904 cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
904} 905}
905 906
906static struct page * 907static struct page *
@@ -2667,19 +2668,16 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
2667{ 2668{
2668 if (!con->ops->get(con)) { 2669 if (!con->ops->get(con)) {
2669 dout("%s %p ref count 0\n", __func__, con); 2670 dout("%s %p ref count 0\n", __func__, con);
2670
2671 return -ENOENT; 2671 return -ENOENT;
2672 } 2672 }
2673 2673
2674 if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { 2674 if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
2675 dout("%s %p - already queued\n", __func__, con); 2675 dout("%s %p - already queued\n", __func__, con);
2676 con->ops->put(con); 2676 con->ops->put(con);
2677
2678 return -EBUSY; 2677 return -EBUSY;
2679 } 2678 }
2680 2679
2681 dout("%s %p %lu\n", __func__, con, delay); 2680 dout("%s %p %lu\n", __func__, con, delay);
2682
2683 return 0; 2681 return 0;
2684} 2682}
2685 2683
@@ -2688,6 +2686,14 @@ static void queue_con(struct ceph_connection *con)
2688 (void) queue_con_delay(con, 0); 2686 (void) queue_con_delay(con, 0);
2689} 2687}
2690 2688
2689static void cancel_con(struct ceph_connection *con)
2690{
2691 if (cancel_delayed_work(&con->work)) {
2692 dout("%s %p\n", __func__, con);
2693 con->ops->put(con);
2694 }
2695}
2696
2691static bool con_sock_closed(struct ceph_connection *con) 2697static bool con_sock_closed(struct ceph_connection *con)
2692{ 2698{
2693 if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED)) 2699 if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
@@ -3269,24 +3275,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
3269/* 3275/*
3270 * Free a generically kmalloc'd message. 3276 * Free a generically kmalloc'd message.
3271 */ 3277 */
3272void ceph_msg_kfree(struct ceph_msg *m) 3278static void ceph_msg_free(struct ceph_msg *m)
3273{ 3279{
3274 dout("msg_kfree %p\n", m); 3280 dout("%s %p\n", __func__, m);
3275 ceph_kvfree(m->front.iov_base); 3281 ceph_kvfree(m->front.iov_base);
3276 kmem_cache_free(ceph_msg_cache, m); 3282 kmem_cache_free(ceph_msg_cache, m);
3277} 3283}
3278 3284
3279/* 3285static void ceph_msg_release(struct kref *kref)
3280 * Drop a msg ref. Destroy as needed.
3281 */
3282void ceph_msg_last_put(struct kref *kref)
3283{ 3286{
3284 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); 3287 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
3285 LIST_HEAD(data); 3288 LIST_HEAD(data);
3286 struct list_head *links; 3289 struct list_head *links;
3287 struct list_head *next; 3290 struct list_head *next;
3288 3291
3289 dout("ceph_msg_put last one on %p\n", m); 3292 dout("%s %p\n", __func__, m);
3290 WARN_ON(!list_empty(&m->list_head)); 3293 WARN_ON(!list_empty(&m->list_head));
3291 3294
3292 /* drop middle, data, if any */ 3295 /* drop middle, data, if any */
@@ -3308,9 +3311,25 @@ void ceph_msg_last_put(struct kref *kref)
3308 if (m->pool) 3311 if (m->pool)
3309 ceph_msgpool_put(m->pool, m); 3312 ceph_msgpool_put(m->pool, m);
3310 else 3313 else
3311 ceph_msg_kfree(m); 3314 ceph_msg_free(m);
3315}
3316
3317struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
3318{
3319 dout("%s %p (was %d)\n", __func__, msg,
3320 atomic_read(&msg->kref.refcount));
3321 kref_get(&msg->kref);
3322 return msg;
3323}
3324EXPORT_SYMBOL(ceph_msg_get);
3325
3326void ceph_msg_put(struct ceph_msg *msg)
3327{
3328 dout("%s %p (was %d)\n", __func__, msg,
3329 atomic_read(&msg->kref.refcount));
3330 kref_put(&msg->kref, ceph_msg_release);
3312} 3331}
3313EXPORT_SYMBOL(ceph_msg_last_put); 3332EXPORT_SYMBOL(ceph_msg_put);
3314 3333
3315void ceph_msg_dump(struct ceph_msg *msg) 3334void ceph_msg_dump(struct ceph_msg *msg)
3316{ 3335{
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 067d3af2eaf6..61fcfc304f68 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1181,7 +1181,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
1181 if (!m) { 1181 if (!m) {
1182 pr_info("alloc_msg unknown type %d\n", type); 1182 pr_info("alloc_msg unknown type %d\n", type);
1183 *skip = 1; 1183 *skip = 1;
1184 } else if (front_len > m->front_alloc_len) {
1185 pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n",
1186 front_len, m->front_alloc_len,
1187 (unsigned int)con->peer_name.type,
1188 le64_to_cpu(con->peer_name.num));
1189 ceph_msg_put(m);
1190 m = ceph_msg_new(type, front_len, GFP_NOFS, false);
1184 } 1191 }
1192
1185 return m; 1193 return m;
1186} 1194}
1187 1195
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 05be0c181695..30f6faf3584f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -297,12 +297,21 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
297/* 297/*
298 * requests 298 * requests
299 */ 299 */
300void ceph_osdc_release_request(struct kref *kref) 300static void ceph_osdc_release_request(struct kref *kref)
301{ 301{
302 struct ceph_osd_request *req; 302 struct ceph_osd_request *req = container_of(kref,
303 struct ceph_osd_request, r_kref);
303 unsigned int which; 304 unsigned int which;
304 305
305 req = container_of(kref, struct ceph_osd_request, r_kref); 306 dout("%s %p (r_request %p r_reply %p)\n", __func__, req,
307 req->r_request, req->r_reply);
308 WARN_ON(!RB_EMPTY_NODE(&req->r_node));
309 WARN_ON(!list_empty(&req->r_req_lru_item));
310 WARN_ON(!list_empty(&req->r_osd_item));
311 WARN_ON(!list_empty(&req->r_linger_item));
312 WARN_ON(!list_empty(&req->r_linger_osd_item));
313 WARN_ON(req->r_osd);
314
306 if (req->r_request) 315 if (req->r_request)
307 ceph_msg_put(req->r_request); 316 ceph_msg_put(req->r_request);
308 if (req->r_reply) { 317 if (req->r_reply) {
@@ -320,7 +329,22 @@ void ceph_osdc_release_request(struct kref *kref)
320 kmem_cache_free(ceph_osd_request_cache, req); 329 kmem_cache_free(ceph_osd_request_cache, req);
321 330
322} 331}
323EXPORT_SYMBOL(ceph_osdc_release_request); 332
333void ceph_osdc_get_request(struct ceph_osd_request *req)
334{
335 dout("%s %p (was %d)\n", __func__, req,
336 atomic_read(&req->r_kref.refcount));
337 kref_get(&req->r_kref);
338}
339EXPORT_SYMBOL(ceph_osdc_get_request);
340
341void ceph_osdc_put_request(struct ceph_osd_request *req)
342{
343 dout("%s %p (was %d)\n", __func__, req,
344 atomic_read(&req->r_kref.refcount));
345 kref_put(&req->r_kref, ceph_osdc_release_request);
346}
347EXPORT_SYMBOL(ceph_osdc_put_request);
324 348
325struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 349struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
326 struct ceph_snap_context *snapc, 350 struct ceph_snap_context *snapc,
@@ -364,7 +388,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
364 RB_CLEAR_NODE(&req->r_node); 388 RB_CLEAR_NODE(&req->r_node);
365 INIT_LIST_HEAD(&req->r_unsafe_item); 389 INIT_LIST_HEAD(&req->r_unsafe_item);
366 INIT_LIST_HEAD(&req->r_linger_item); 390 INIT_LIST_HEAD(&req->r_linger_item);
367 INIT_LIST_HEAD(&req->r_linger_osd); 391 INIT_LIST_HEAD(&req->r_linger_osd_item);
368 INIT_LIST_HEAD(&req->r_req_lru_item); 392 INIT_LIST_HEAD(&req->r_req_lru_item);
369 INIT_LIST_HEAD(&req->r_osd_item); 393 INIT_LIST_HEAD(&req->r_osd_item);
370 394
@@ -916,7 +940,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
916 * list at the end to keep things in tid order. 940 * list at the end to keep things in tid order.
917 */ 941 */
918 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, 942 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
919 r_linger_osd) { 943 r_linger_osd_item) {
920 /* 944 /*
921 * reregister request prior to unregistering linger so 945 * reregister request prior to unregistering linger so
922 * that r_osd is preserved. 946 * that r_osd is preserved.
@@ -1008,6 +1032,8 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
1008{ 1032{
1009 dout("__remove_osd %p\n", osd); 1033 dout("__remove_osd %p\n", osd);
1010 BUG_ON(!list_empty(&osd->o_requests)); 1034 BUG_ON(!list_empty(&osd->o_requests));
1035 BUG_ON(!list_empty(&osd->o_linger_requests));
1036
1011 rb_erase(&osd->o_node, &osdc->osds); 1037 rb_erase(&osd->o_node, &osdc->osds);
1012 list_del_init(&osd->o_osd_lru); 1038 list_del_init(&osd->o_osd_lru);
1013 ceph_con_close(&osd->o_con); 1039 ceph_con_close(&osd->o_con);
@@ -1029,12 +1055,23 @@ static void remove_all_osds(struct ceph_osd_client *osdc)
1029static void __move_osd_to_lru(struct ceph_osd_client *osdc, 1055static void __move_osd_to_lru(struct ceph_osd_client *osdc,
1030 struct ceph_osd *osd) 1056 struct ceph_osd *osd)
1031{ 1057{
1032 dout("__move_osd_to_lru %p\n", osd); 1058 dout("%s %p\n", __func__, osd);
1033 BUG_ON(!list_empty(&osd->o_osd_lru)); 1059 BUG_ON(!list_empty(&osd->o_osd_lru));
1060
1034 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); 1061 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
1035 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; 1062 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
1036} 1063}
1037 1064
1065static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
1066 struct ceph_osd *osd)
1067{
1068 dout("%s %p\n", __func__, osd);
1069
1070 if (list_empty(&osd->o_requests) &&
1071 list_empty(&osd->o_linger_requests))
1072 __move_osd_to_lru(osdc, osd);
1073}
1074
1038static void __remove_osd_from_lru(struct ceph_osd *osd) 1075static void __remove_osd_from_lru(struct ceph_osd *osd)
1039{ 1076{
1040 dout("__remove_osd_from_lru %p\n", osd); 1077 dout("__remove_osd_from_lru %p\n", osd);
@@ -1175,6 +1212,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
1175 1212
1176 dout("__unregister_request %p tid %lld\n", req, req->r_tid); 1213 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
1177 rb_erase(&req->r_node, &osdc->requests); 1214 rb_erase(&req->r_node, &osdc->requests);
1215 RB_CLEAR_NODE(&req->r_node);
1178 osdc->num_requests--; 1216 osdc->num_requests--;
1179 1217
1180 if (req->r_osd) { 1218 if (req->r_osd) {
@@ -1182,12 +1220,8 @@ static void __unregister_request(struct ceph_osd_client *osdc,
1182 ceph_msg_revoke(req->r_request); 1220 ceph_msg_revoke(req->r_request);
1183 1221
1184 list_del_init(&req->r_osd_item); 1222 list_del_init(&req->r_osd_item);
1185 if (list_empty(&req->r_osd->o_requests) && 1223 maybe_move_osd_to_lru(osdc, req->r_osd);
1186 list_empty(&req->r_osd->o_linger_requests)) { 1224 if (list_empty(&req->r_linger_osd_item))
1187 dout("moving osd to %p lru\n", req->r_osd);
1188 __move_osd_to_lru(osdc, req->r_osd);
1189 }
1190 if (list_empty(&req->r_linger_item))
1191 req->r_osd = NULL; 1225 req->r_osd = NULL;
1192 } 1226 }
1193 1227
@@ -1214,45 +1248,39 @@ static void __cancel_request(struct ceph_osd_request *req)
1214static void __register_linger_request(struct ceph_osd_client *osdc, 1248static void __register_linger_request(struct ceph_osd_client *osdc,
1215 struct ceph_osd_request *req) 1249 struct ceph_osd_request *req)
1216{ 1250{
1217 dout("__register_linger_request %p\n", req); 1251 dout("%s %p tid %llu\n", __func__, req, req->r_tid);
1252 WARN_ON(!req->r_linger);
1253
1218 ceph_osdc_get_request(req); 1254 ceph_osdc_get_request(req);
1219 list_add_tail(&req->r_linger_item, &osdc->req_linger); 1255 list_add_tail(&req->r_linger_item, &osdc->req_linger);
1220 if (req->r_osd) 1256 if (req->r_osd)
1221 list_add_tail(&req->r_linger_osd, 1257 list_add_tail(&req->r_linger_osd_item,
1222 &req->r_osd->o_linger_requests); 1258 &req->r_osd->o_linger_requests);
1223} 1259}
1224 1260
1225static void __unregister_linger_request(struct ceph_osd_client *osdc, 1261static void __unregister_linger_request(struct ceph_osd_client *osdc,
1226 struct ceph_osd_request *req) 1262 struct ceph_osd_request *req)
1227{ 1263{
1228 dout("__unregister_linger_request %p\n", req); 1264 WARN_ON(!req->r_linger);
1265
1266 if (list_empty(&req->r_linger_item)) {
1267 dout("%s %p tid %llu not registered\n", __func__, req,
1268 req->r_tid);
1269 return;
1270 }
1271
1272 dout("%s %p tid %llu\n", __func__, req, req->r_tid);
1229 list_del_init(&req->r_linger_item); 1273 list_del_init(&req->r_linger_item);
1230 if (req->r_osd) {
1231 list_del_init(&req->r_linger_osd);
1232 1274
1233 if (list_empty(&req->r_osd->o_requests) && 1275 if (req->r_osd) {
1234 list_empty(&req->r_osd->o_linger_requests)) { 1276 list_del_init(&req->r_linger_osd_item);
1235 dout("moving osd to %p lru\n", req->r_osd); 1277 maybe_move_osd_to_lru(osdc, req->r_osd);
1236 __move_osd_to_lru(osdc, req->r_osd);
1237 }
1238 if (list_empty(&req->r_osd_item)) 1278 if (list_empty(&req->r_osd_item))
1239 req->r_osd = NULL; 1279 req->r_osd = NULL;
1240 } 1280 }
1241 ceph_osdc_put_request(req); 1281 ceph_osdc_put_request(req);
1242} 1282}
1243 1283
1244void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
1245 struct ceph_osd_request *req)
1246{
1247 mutex_lock(&osdc->request_mutex);
1248 if (req->r_linger) {
1249 req->r_linger = 0;
1250 __unregister_linger_request(osdc, req);
1251 }
1252 mutex_unlock(&osdc->request_mutex);
1253}
1254EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
1255
1256void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, 1284void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
1257 struct ceph_osd_request *req) 1285 struct ceph_osd_request *req)
1258{ 1286{
@@ -2430,6 +2458,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
2430EXPORT_SYMBOL(ceph_osdc_start_request); 2458EXPORT_SYMBOL(ceph_osdc_start_request);
2431 2459
2432/* 2460/*
2461 * Unregister a registered request. The request is not completed (i.e.
2462 * no callbacks or wakeups) - higher layers are supposed to know what
2463 * they are canceling.
2464 */
2465void ceph_osdc_cancel_request(struct ceph_osd_request *req)
2466{
2467 struct ceph_osd_client *osdc = req->r_osdc;
2468
2469 mutex_lock(&osdc->request_mutex);
2470 if (req->r_linger)
2471 __unregister_linger_request(osdc, req);
2472 __unregister_request(osdc, req);
2473 mutex_unlock(&osdc->request_mutex);
2474
2475 dout("%s %p tid %llu canceled\n", __func__, req, req->r_tid);
2476}
2477EXPORT_SYMBOL(ceph_osdc_cancel_request);
2478
2479/*
2433 * wait for a request to complete 2480 * wait for a request to complete
2434 */ 2481 */
2435int ceph_osdc_wait_request(struct ceph_osd_client *osdc, 2482int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
@@ -2437,18 +2484,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
2437{ 2484{
2438 int rc; 2485 int rc;
2439 2486
2487 dout("%s %p tid %llu\n", __func__, req, req->r_tid);
2488
2440 rc = wait_for_completion_interruptible(&req->r_completion); 2489 rc = wait_for_completion_interruptible(&req->r_completion);
2441 if (rc < 0) { 2490 if (rc < 0) {
2442 mutex_lock(&osdc->request_mutex); 2491 dout("%s %p tid %llu interrupted\n", __func__, req, req->r_tid);
2443 __cancel_request(req); 2492 ceph_osdc_cancel_request(req);
2444 __unregister_request(osdc, req);
2445 mutex_unlock(&osdc->request_mutex);
2446 complete_request(req); 2493 complete_request(req);
2447 dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
2448 return rc; 2494 return rc;
2449 } 2495 }
2450 2496
2451 dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); 2497 dout("%s %p tid %llu result %d\n", __func__, req, req->r_tid,
2498 req->r_result);
2452 return req->r_result; 2499 return req->r_result;
2453} 2500}
2454EXPORT_SYMBOL(ceph_osdc_wait_request); 2501EXPORT_SYMBOL(ceph_osdc_wait_request);