aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 19:43:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-13 19:43:29 -0400
commit8d2d441ac4af223eae466c3c31ff737cc31a1411 (patch)
treed14b0f72e80f94c1575c281bd21d43a86de0a92d /net/ceph
parent89838b80bbbf9774cf010905851db7913c9331f0 (diff)
parent5f740d7e1531099b888410e6bab13f68da9b1a4d (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "There is a lot of refactoring and hardening of the libceph and rbd code here from Ilya that fix various smaller bugs, and a few more important fixes with clone overlap. The main fix is a critical change to the request_fn handling to not sleep that was exposed by the recent mutex changes (which will also go to the 3.16 stable series). Yan Zheng has several fixes in here for CephFS fixing ACL handling, time stamps, and request resends when the MDS restarts. Finally, there are a few cleanups from Himangi Saraogi based on Coccinelle" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (39 commits) libceph: set last_piece in ceph_msg_data_pages_cursor_init() correctly rbd: remove extra newlines from rbd_warn() messages rbd: allocate img_request with GFP_NOIO instead GFP_ATOMIC rbd: rework rbd_request_fn() ceph: fix kick_requests() ceph: fix append mode write ceph: fix sizeof(struct tYpO *) typo ceph: remove redundant memset(0) rbd: take snap_id into account when reading in parent info rbd: do not read in parent info before snap context rbd: update mapping size only on refresh rbd: harden rbd_dev_refresh() and callers a bit rbd: split rbd_dev_spec_update() into two functions rbd: remove unnecessary asserts in rbd_dev_image_probe() rbd: introduce rbd_dev_header_info() rbd: show the entire chain of parent images ceph: replace comma with a semicolon rbd: use rbd_segment_name_free() instead of kfree() ceph: check zero length in ceph_sync_read() ceph: reset r_resend_mds after receiving -ESTALE ...
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/messenger.c47
-rw-r--r--net/ceph/osd_client.c129
2 files changed, 121 insertions, 55 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1948d592aa54..b2f571dd933d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -174,6 +174,7 @@ static struct lock_class_key socket_class;
174#define SKIP_BUF_SIZE 1024 174#define SKIP_BUF_SIZE 1024
175 175
176static void queue_con(struct ceph_connection *con); 176static void queue_con(struct ceph_connection *con);
177static void cancel_con(struct ceph_connection *con);
177static void con_work(struct work_struct *); 178static void con_work(struct work_struct *);
178static void con_fault(struct ceph_connection *con); 179static void con_fault(struct ceph_connection *con);
179 180
@@ -680,7 +681,7 @@ void ceph_con_close(struct ceph_connection *con)
680 681
681 reset_connection(con); 682 reset_connection(con);
682 con->peer_global_seq = 0; 683 con->peer_global_seq = 0;
683 cancel_delayed_work(&con->work); 684 cancel_con(con);
684 con_close_socket(con); 685 con_close_socket(con);
685 mutex_unlock(&con->mutex); 686 mutex_unlock(&con->mutex);
686} 687}
@@ -900,7 +901,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
900 BUG_ON(page_count > (int)USHRT_MAX); 901 BUG_ON(page_count > (int)USHRT_MAX);
901 cursor->page_count = (unsigned short)page_count; 902 cursor->page_count = (unsigned short)page_count;
902 BUG_ON(length > SIZE_MAX - cursor->page_offset); 903 BUG_ON(length > SIZE_MAX - cursor->page_offset);
903 cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE; 904 cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
904} 905}
905 906
906static struct page * 907static struct page *
@@ -2667,19 +2668,16 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
2667{ 2668{
2668 if (!con->ops->get(con)) { 2669 if (!con->ops->get(con)) {
2669 dout("%s %p ref count 0\n", __func__, con); 2670 dout("%s %p ref count 0\n", __func__, con);
2670
2671 return -ENOENT; 2671 return -ENOENT;
2672 } 2672 }
2673 2673
2674 if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { 2674 if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
2675 dout("%s %p - already queued\n", __func__, con); 2675 dout("%s %p - already queued\n", __func__, con);
2676 con->ops->put(con); 2676 con->ops->put(con);
2677
2678 return -EBUSY; 2677 return -EBUSY;
2679 } 2678 }
2680 2679
2681 dout("%s %p %lu\n", __func__, con, delay); 2680 dout("%s %p %lu\n", __func__, con, delay);
2682
2683 return 0; 2681 return 0;
2684} 2682}
2685 2683
@@ -2688,6 +2686,14 @@ static void queue_con(struct ceph_connection *con)
2688 (void) queue_con_delay(con, 0); 2686 (void) queue_con_delay(con, 0);
2689} 2687}
2690 2688
2689static void cancel_con(struct ceph_connection *con)
2690{
2691 if (cancel_delayed_work(&con->work)) {
2692 dout("%s %p\n", __func__, con);
2693 con->ops->put(con);
2694 }
2695}
2696
2691static bool con_sock_closed(struct ceph_connection *con) 2697static bool con_sock_closed(struct ceph_connection *con)
2692{ 2698{
2693 if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED)) 2699 if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
@@ -3269,24 +3275,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
3269/* 3275/*
3270 * Free a generically kmalloc'd message. 3276 * Free a generically kmalloc'd message.
3271 */ 3277 */
3272void ceph_msg_kfree(struct ceph_msg *m) 3278static void ceph_msg_free(struct ceph_msg *m)
3273{ 3279{
3274 dout("msg_kfree %p\n", m); 3280 dout("%s %p\n", __func__, m);
3275 ceph_kvfree(m->front.iov_base); 3281 ceph_kvfree(m->front.iov_base);
3276 kmem_cache_free(ceph_msg_cache, m); 3282 kmem_cache_free(ceph_msg_cache, m);
3277} 3283}
3278 3284
3279/* 3285static void ceph_msg_release(struct kref *kref)
3280 * Drop a msg ref. Destroy as needed.
3281 */
3282void ceph_msg_last_put(struct kref *kref)
3283{ 3286{
3284 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); 3287 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
3285 LIST_HEAD(data); 3288 LIST_HEAD(data);
3286 struct list_head *links; 3289 struct list_head *links;
3287 struct list_head *next; 3290 struct list_head *next;
3288 3291
3289 dout("ceph_msg_put last one on %p\n", m); 3292 dout("%s %p\n", __func__, m);
3290 WARN_ON(!list_empty(&m->list_head)); 3293 WARN_ON(!list_empty(&m->list_head));
3291 3294
3292 /* drop middle, data, if any */ 3295 /* drop middle, data, if any */
@@ -3308,9 +3311,25 @@ void ceph_msg_last_put(struct kref *kref)
3308 if (m->pool) 3311 if (m->pool)
3309 ceph_msgpool_put(m->pool, m); 3312 ceph_msgpool_put(m->pool, m);
3310 else 3313 else
3311 ceph_msg_kfree(m); 3314 ceph_msg_free(m);
3315}
3316
3317struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
3318{
3319 dout("%s %p (was %d)\n", __func__, msg,
3320 atomic_read(&msg->kref.refcount));
3321 kref_get(&msg->kref);
3322 return msg;
3323}
3324EXPORT_SYMBOL(ceph_msg_get);
3325
3326void ceph_msg_put(struct ceph_msg *msg)
3327{
3328 dout("%s %p (was %d)\n", __func__, msg,
3329 atomic_read(&msg->kref.refcount));
3330 kref_put(&msg->kref, ceph_msg_release);
3312} 3331}
3313EXPORT_SYMBOL(ceph_msg_last_put); 3332EXPORT_SYMBOL(ceph_msg_put);
3314 3333
3315void ceph_msg_dump(struct ceph_msg *msg) 3334void ceph_msg_dump(struct ceph_msg *msg)
3316{ 3335{
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 05be0c181695..30f6faf3584f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -297,12 +297,21 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
297/* 297/*
298 * requests 298 * requests
299 */ 299 */
300void ceph_osdc_release_request(struct kref *kref) 300static void ceph_osdc_release_request(struct kref *kref)
301{ 301{
302 struct ceph_osd_request *req; 302 struct ceph_osd_request *req = container_of(kref,
303 struct ceph_osd_request, r_kref);
303 unsigned int which; 304 unsigned int which;
304 305
305 req = container_of(kref, struct ceph_osd_request, r_kref); 306 dout("%s %p (r_request %p r_reply %p)\n", __func__, req,
307 req->r_request, req->r_reply);
308 WARN_ON(!RB_EMPTY_NODE(&req->r_node));
309 WARN_ON(!list_empty(&req->r_req_lru_item));
310 WARN_ON(!list_empty(&req->r_osd_item));
311 WARN_ON(!list_empty(&req->r_linger_item));
312 WARN_ON(!list_empty(&req->r_linger_osd_item));
313 WARN_ON(req->r_osd);
314
306 if (req->r_request) 315 if (req->r_request)
307 ceph_msg_put(req->r_request); 316 ceph_msg_put(req->r_request);
308 if (req->r_reply) { 317 if (req->r_reply) {
@@ -320,7 +329,22 @@ void ceph_osdc_release_request(struct kref *kref)
320 kmem_cache_free(ceph_osd_request_cache, req); 329 kmem_cache_free(ceph_osd_request_cache, req);
321 330
322} 331}
323EXPORT_SYMBOL(ceph_osdc_release_request); 332
333void ceph_osdc_get_request(struct ceph_osd_request *req)
334{
335 dout("%s %p (was %d)\n", __func__, req,
336 atomic_read(&req->r_kref.refcount));
337 kref_get(&req->r_kref);
338}
339EXPORT_SYMBOL(ceph_osdc_get_request);
340
341void ceph_osdc_put_request(struct ceph_osd_request *req)
342{
343 dout("%s %p (was %d)\n", __func__, req,
344 atomic_read(&req->r_kref.refcount));
345 kref_put(&req->r_kref, ceph_osdc_release_request);
346}
347EXPORT_SYMBOL(ceph_osdc_put_request);
324 348
325struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 349struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
326 struct ceph_snap_context *snapc, 350 struct ceph_snap_context *snapc,
@@ -364,7 +388,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
364 RB_CLEAR_NODE(&req->r_node); 388 RB_CLEAR_NODE(&req->r_node);
365 INIT_LIST_HEAD(&req->r_unsafe_item); 389 INIT_LIST_HEAD(&req->r_unsafe_item);
366 INIT_LIST_HEAD(&req->r_linger_item); 390 INIT_LIST_HEAD(&req->r_linger_item);
367 INIT_LIST_HEAD(&req->r_linger_osd); 391 INIT_LIST_HEAD(&req->r_linger_osd_item);
368 INIT_LIST_HEAD(&req->r_req_lru_item); 392 INIT_LIST_HEAD(&req->r_req_lru_item);
369 INIT_LIST_HEAD(&req->r_osd_item); 393 INIT_LIST_HEAD(&req->r_osd_item);
370 394
@@ -916,7 +940,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
916 * list at the end to keep things in tid order. 940 * list at the end to keep things in tid order.
917 */ 941 */
918 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, 942 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
919 r_linger_osd) { 943 r_linger_osd_item) {
920 /* 944 /*
921 * reregister request prior to unregistering linger so 945 * reregister request prior to unregistering linger so
922 * that r_osd is preserved. 946 * that r_osd is preserved.
@@ -1008,6 +1032,8 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
1008{ 1032{
1009 dout("__remove_osd %p\n", osd); 1033 dout("__remove_osd %p\n", osd);
1010 BUG_ON(!list_empty(&osd->o_requests)); 1034 BUG_ON(!list_empty(&osd->o_requests));
1035 BUG_ON(!list_empty(&osd->o_linger_requests));
1036
1011 rb_erase(&osd->o_node, &osdc->osds); 1037 rb_erase(&osd->o_node, &osdc->osds);
1012 list_del_init(&osd->o_osd_lru); 1038 list_del_init(&osd->o_osd_lru);
1013 ceph_con_close(&osd->o_con); 1039 ceph_con_close(&osd->o_con);
@@ -1029,12 +1055,23 @@ static void remove_all_osds(struct ceph_osd_client *osdc)
1029static void __move_osd_to_lru(struct ceph_osd_client *osdc, 1055static void __move_osd_to_lru(struct ceph_osd_client *osdc,
1030 struct ceph_osd *osd) 1056 struct ceph_osd *osd)
1031{ 1057{
1032 dout("__move_osd_to_lru %p\n", osd); 1058 dout("%s %p\n", __func__, osd);
1033 BUG_ON(!list_empty(&osd->o_osd_lru)); 1059 BUG_ON(!list_empty(&osd->o_osd_lru));
1060
1034 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); 1061 list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
1035 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; 1062 osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
1036} 1063}
1037 1064
1065static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
1066 struct ceph_osd *osd)
1067{
1068 dout("%s %p\n", __func__, osd);
1069
1070 if (list_empty(&osd->o_requests) &&
1071 list_empty(&osd->o_linger_requests))
1072 __move_osd_to_lru(osdc, osd);
1073}
1074
1038static void __remove_osd_from_lru(struct ceph_osd *osd) 1075static void __remove_osd_from_lru(struct ceph_osd *osd)
1039{ 1076{
1040 dout("__remove_osd_from_lru %p\n", osd); 1077 dout("__remove_osd_from_lru %p\n", osd);
@@ -1175,6 +1212,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
1175 1212
1176 dout("__unregister_request %p tid %lld\n", req, req->r_tid); 1213 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
1177 rb_erase(&req->r_node, &osdc->requests); 1214 rb_erase(&req->r_node, &osdc->requests);
1215 RB_CLEAR_NODE(&req->r_node);
1178 osdc->num_requests--; 1216 osdc->num_requests--;
1179 1217
1180 if (req->r_osd) { 1218 if (req->r_osd) {
@@ -1182,12 +1220,8 @@ static void __unregister_request(struct ceph_osd_client *osdc,
1182 ceph_msg_revoke(req->r_request); 1220 ceph_msg_revoke(req->r_request);
1183 1221
1184 list_del_init(&req->r_osd_item); 1222 list_del_init(&req->r_osd_item);
1185 if (list_empty(&req->r_osd->o_requests) && 1223 maybe_move_osd_to_lru(osdc, req->r_osd);
1186 list_empty(&req->r_osd->o_linger_requests)) { 1224 if (list_empty(&req->r_linger_osd_item))
1187 dout("moving osd to %p lru\n", req->r_osd);
1188 __move_osd_to_lru(osdc, req->r_osd);
1189 }
1190 if (list_empty(&req->r_linger_item))
1191 req->r_osd = NULL; 1225 req->r_osd = NULL;
1192 } 1226 }
1193 1227
@@ -1214,45 +1248,39 @@ static void __cancel_request(struct ceph_osd_request *req)
1214static void __register_linger_request(struct ceph_osd_client *osdc, 1248static void __register_linger_request(struct ceph_osd_client *osdc,
1215 struct ceph_osd_request *req) 1249 struct ceph_osd_request *req)
1216{ 1250{
1217 dout("__register_linger_request %p\n", req); 1251 dout("%s %p tid %llu\n", __func__, req, req->r_tid);
1252 WARN_ON(!req->r_linger);
1253
1218 ceph_osdc_get_request(req); 1254 ceph_osdc_get_request(req);
1219 list_add_tail(&req->r_linger_item, &osdc->req_linger); 1255 list_add_tail(&req->r_linger_item, &osdc->req_linger);
1220 if (req->r_osd) 1256 if (req->r_osd)
1221 list_add_tail(&req->r_linger_osd, 1257 list_add_tail(&req->r_linger_osd_item,
1222 &req->r_osd->o_linger_requests); 1258 &req->r_osd->o_linger_requests);
1223} 1259}
1224 1260
1225static void __unregister_linger_request(struct ceph_osd_client *osdc, 1261static void __unregister_linger_request(struct ceph_osd_client *osdc,
1226 struct ceph_osd_request *req) 1262 struct ceph_osd_request *req)
1227{ 1263{
1228 dout("__unregister_linger_request %p\n", req); 1264 WARN_ON(!req->r_linger);
1265
1266 if (list_empty(&req->r_linger_item)) {
1267 dout("%s %p tid %llu not registered\n", __func__, req,
1268 req->r_tid);
1269 return;
1270 }
1271
1272 dout("%s %p tid %llu\n", __func__, req, req->r_tid);
1229 list_del_init(&req->r_linger_item); 1273 list_del_init(&req->r_linger_item);
1230 if (req->r_osd) {
1231 list_del_init(&req->r_linger_osd);
1232 1274
1233 if (list_empty(&req->r_osd->o_requests) && 1275 if (req->r_osd) {
1234 list_empty(&req->r_osd->o_linger_requests)) { 1276 list_del_init(&req->r_linger_osd_item);
1235 dout("moving osd to %p lru\n", req->r_osd); 1277 maybe_move_osd_to_lru(osdc, req->r_osd);
1236 __move_osd_to_lru(osdc, req->r_osd);
1237 }
1238 if (list_empty(&req->r_osd_item)) 1278 if (list_empty(&req->r_osd_item))
1239 req->r_osd = NULL; 1279 req->r_osd = NULL;
1240 } 1280 }
1241 ceph_osdc_put_request(req); 1281 ceph_osdc_put_request(req);
1242} 1282}
1243 1283
1244void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
1245 struct ceph_osd_request *req)
1246{
1247 mutex_lock(&osdc->request_mutex);
1248 if (req->r_linger) {
1249 req->r_linger = 0;
1250 __unregister_linger_request(osdc, req);
1251 }
1252 mutex_unlock(&osdc->request_mutex);
1253}
1254EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
1255
1256void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, 1284void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
1257 struct ceph_osd_request *req) 1285 struct ceph_osd_request *req)
1258{ 1286{
@@ -2430,6 +2458,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
2430EXPORT_SYMBOL(ceph_osdc_start_request); 2458EXPORT_SYMBOL(ceph_osdc_start_request);
2431 2459
2432/* 2460/*
2461 * Unregister a registered request. The request is not completed (i.e.
2462 * no callbacks or wakeups) - higher layers are supposed to know what
2463 * they are canceling.
2464 */
2465void ceph_osdc_cancel_request(struct ceph_osd_request *req)
2466{
2467 struct ceph_osd_client *osdc = req->r_osdc;
2468
2469 mutex_lock(&osdc->request_mutex);
2470 if (req->r_linger)
2471 __unregister_linger_request(osdc, req);
2472 __unregister_request(osdc, req);
2473 mutex_unlock(&osdc->request_mutex);
2474
2475 dout("%s %p tid %llu canceled\n", __func__, req, req->r_tid);
2476}
2477EXPORT_SYMBOL(ceph_osdc_cancel_request);
2478
2479/*
2433 * wait for a request to complete 2480 * wait for a request to complete
2434 */ 2481 */
2435int ceph_osdc_wait_request(struct ceph_osd_client *osdc, 2482int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
@@ -2437,18 +2484,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
2437{ 2484{
2438 int rc; 2485 int rc;
2439 2486
2487 dout("%s %p tid %llu\n", __func__, req, req->r_tid);
2488
2440 rc = wait_for_completion_interruptible(&req->r_completion); 2489 rc = wait_for_completion_interruptible(&req->r_completion);
2441 if (rc < 0) { 2490 if (rc < 0) {
2442 mutex_lock(&osdc->request_mutex); 2491 dout("%s %p tid %llu interrupted\n", __func__, req, req->r_tid);
2443 __cancel_request(req); 2492 ceph_osdc_cancel_request(req);
2444 __unregister_request(osdc, req);
2445 mutex_unlock(&osdc->request_mutex);
2446 complete_request(req); 2493 complete_request(req);
2447 dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
2448 return rc; 2494 return rc;
2449 } 2495 }
2450 2496
2451 dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); 2497 dout("%s %p tid %llu result %d\n", __func__, req, req->r_tid,
2498 req->r_result);
2452 return req->r_result; 2499 return req->r_result;
2453} 2500}
2454EXPORT_SYMBOL(ceph_osdc_wait_request); 2501EXPORT_SYMBOL(ceph_osdc_wait_request);