aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-11 15:33:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-11 15:33:03 -0400
commite013f74b60bbd37ee8c3a55214eb351ea3101c15 (patch)
tree096b59f550dea6df9347edf97b872dc75a79f653 /net
parent01cab5549c3e9a0fe7248fc5ad0fd79361cc0d39 (diff)
parent438386853d4c0c48fe73bf05a7d61c70ca5a3bfb (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph update from Sage Weil: "There are a few fixes for snapshot behavior with CephFS and support for the new keepalive protocol from Zheng, a libceph fix that affects both RBD and CephFS, a few bug fixes and cleanups for RBD from Ilya, and several small fixes and cleanups from Jianpeng and others" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: improve readahead for file holes ceph: get inode size for each append write libceph: check data_len in ->alloc_msg() libceph: use keepalive2 to verify the mon session is alive rbd: plug rbd_dev->header.object_prefix memory leak rbd: fix double free on rbd_dev->header_name libceph: set 'exists' flag for newly up osd ceph: cleanup use of ceph_msg_get ceph: no need to get parent inode in ceph_open ceph: remove the useless judgement ceph: remove redundant test of head->safe and silence static analysis warnings ceph: fix queuing inode to mdsdir's snaprealm libceph: rename con_work() to ceph_con_workfn() libceph: Avoid holding the zero page on ceph_msgr_slab_init errors libceph: remove the unused macro AES_KEY_SIZE ceph: invalidate dirty pages after forced umount ceph: EIO all operations after forced umount
Diffstat (limited to 'net')
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/crypto.c4
-rw-r--r--net/ceph/messenger.c82
-rw-r--r--net/ceph/mon_client.c37
-rw-r--r--net/ceph/osd_client.c51
-rw-r--r--net/ceph/osdmap.c2
6 files changed, 111 insertions, 66 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 69a4d30a9ccf..54a00d66509e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name,
357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
360 opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
360 361
361 /* get mon ip(s) */ 362 /* get mon ip(s) */
362 /* ip1[:port1][,ip2[:port2]...] */ 363 /* ip1[:port1][,ip2[:port2]...] */
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 790fe89d90c0..4440edcce0d6 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -79,10 +79,6 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
79 return 0; 79 return 0;
80} 80}
81 81
82
83
84#define AES_KEY_SIZE 16
85
86static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) 82static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
87{ 83{
88 return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); 84 return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e3be1d22a247..525f454f7531 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache;
163static char tag_msg = CEPH_MSGR_TAG_MSG; 163static char tag_msg = CEPH_MSGR_TAG_MSG;
164static char tag_ack = CEPH_MSGR_TAG_ACK; 164static char tag_ack = CEPH_MSGR_TAG_ACK;
165static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 165static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
166static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
166 167
167#ifdef CONFIG_LOCKDEP 168#ifdef CONFIG_LOCKDEP
168static struct lock_class_key socket_class; 169static struct lock_class_key socket_class;
@@ -176,7 +177,7 @@ static struct lock_class_key socket_class;
176 177
177static void queue_con(struct ceph_connection *con); 178static void queue_con(struct ceph_connection *con);
178static void cancel_con(struct ceph_connection *con); 179static void cancel_con(struct ceph_connection *con);
179static void con_work(struct work_struct *); 180static void ceph_con_workfn(struct work_struct *);
180static void con_fault(struct ceph_connection *con); 181static void con_fault(struct ceph_connection *con);
181 182
182/* 183/*
@@ -276,22 +277,22 @@ static void _ceph_msgr_exit(void)
276 ceph_msgr_wq = NULL; 277 ceph_msgr_wq = NULL;
277 } 278 }
278 279
279 ceph_msgr_slab_exit();
280
281 BUG_ON(zero_page == NULL); 280 BUG_ON(zero_page == NULL);
282 page_cache_release(zero_page); 281 page_cache_release(zero_page);
283 zero_page = NULL; 282 zero_page = NULL;
283
284 ceph_msgr_slab_exit();
284} 285}
285 286
286int ceph_msgr_init(void) 287int ceph_msgr_init(void)
287{ 288{
289 if (ceph_msgr_slab_init())
290 return -ENOMEM;
291
288 BUG_ON(zero_page != NULL); 292 BUG_ON(zero_page != NULL);
289 zero_page = ZERO_PAGE(0); 293 zero_page = ZERO_PAGE(0);
290 page_cache_get(zero_page); 294 page_cache_get(zero_page);
291 295
292 if (ceph_msgr_slab_init())
293 return -ENOMEM;
294
295 /* 296 /*
296 * The number of active work items is limited by the number of 297 * The number of active work items is limited by the number of
297 * connections, so leave @max_active at default. 298 * connections, so leave @max_active at default.
@@ -749,7 +750,7 @@ void ceph_con_init(struct ceph_connection *con, void *private,
749 mutex_init(&con->mutex); 750 mutex_init(&con->mutex);
750 INIT_LIST_HEAD(&con->out_queue); 751 INIT_LIST_HEAD(&con->out_queue);
751 INIT_LIST_HEAD(&con->out_sent); 752 INIT_LIST_HEAD(&con->out_sent);
752 INIT_DELAYED_WORK(&con->work, con_work); 753 INIT_DELAYED_WORK(&con->work, ceph_con_workfn);
753 754
754 con->state = CON_STATE_CLOSED; 755 con->state = CON_STATE_CLOSED;
755} 756}
@@ -1351,7 +1352,15 @@ static void prepare_write_keepalive(struct ceph_connection *con)
1351{ 1352{
1352 dout("prepare_write_keepalive %p\n", con); 1353 dout("prepare_write_keepalive %p\n", con);
1353 con_out_kvec_reset(con); 1354 con_out_kvec_reset(con);
1354 con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); 1355 if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
1356 struct timespec ts = CURRENT_TIME;
1357 struct ceph_timespec ceph_ts;
1358 ceph_encode_timespec(&ceph_ts, &ts);
1359 con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
1360 con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts);
1361 } else {
1362 con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive);
1363 }
1355 con_flag_set(con, CON_FLAG_WRITE_PENDING); 1364 con_flag_set(con, CON_FLAG_WRITE_PENDING);
1356} 1365}
1357 1366
@@ -1625,6 +1634,12 @@ static void prepare_read_tag(struct ceph_connection *con)
1625 con->in_tag = CEPH_MSGR_TAG_READY; 1634 con->in_tag = CEPH_MSGR_TAG_READY;
1626} 1635}
1627 1636
1637static void prepare_read_keepalive_ack(struct ceph_connection *con)
1638{
1639 dout("prepare_read_keepalive_ack %p\n", con);
1640 con->in_base_pos = 0;
1641}
1642
1628/* 1643/*
1629 * Prepare to read a message. 1644 * Prepare to read a message.
1630 */ 1645 */
@@ -2322,13 +2337,6 @@ static int read_partial_message(struct ceph_connection *con)
2322 return ret; 2337 return ret;
2323 2338
2324 BUG_ON(!con->in_msg ^ skip); 2339 BUG_ON(!con->in_msg ^ skip);
2325 if (con->in_msg && data_len > con->in_msg->data_length) {
2326 pr_warn("%s skipping long message (%u > %zd)\n",
2327 __func__, data_len, con->in_msg->data_length);
2328 ceph_msg_put(con->in_msg);
2329 con->in_msg = NULL;
2330 skip = 1;
2331 }
2332 if (skip) { 2340 if (skip) {
2333 /* skip this message */ 2341 /* skip this message */
2334 dout("alloc_msg said skip message\n"); 2342 dout("alloc_msg said skip message\n");
@@ -2457,6 +2465,17 @@ static void process_message(struct ceph_connection *con)
2457 mutex_lock(&con->mutex); 2465 mutex_lock(&con->mutex);
2458} 2466}
2459 2467
2468static int read_keepalive_ack(struct ceph_connection *con)
2469{
2470 struct ceph_timespec ceph_ts;
2471 size_t size = sizeof(ceph_ts);
2472 int ret = read_partial(con, size, size, &ceph_ts);
2473 if (ret <= 0)
2474 return ret;
2475 ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
2476 prepare_read_tag(con);
2477 return 1;
2478}
2460 2479
2461/* 2480/*
2462 * Write something to the socket. Called in a worker thread when the 2481 * Write something to the socket. Called in a worker thread when the
@@ -2526,6 +2545,10 @@ more_kvec:
2526 2545
2527do_next: 2546do_next:
2528 if (con->state == CON_STATE_OPEN) { 2547 if (con->state == CON_STATE_OPEN) {
2548 if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
2549 prepare_write_keepalive(con);
2550 goto more;
2551 }
2529 /* is anything else pending? */ 2552 /* is anything else pending? */
2530 if (!list_empty(&con->out_queue)) { 2553 if (!list_empty(&con->out_queue)) {
2531 prepare_write_message(con); 2554 prepare_write_message(con);
@@ -2535,10 +2558,6 @@ do_next:
2535 prepare_write_ack(con); 2558 prepare_write_ack(con);
2536 goto more; 2559 goto more;
2537 } 2560 }
2538 if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
2539 prepare_write_keepalive(con);
2540 goto more;
2541 }
2542 } 2561 }
2543 2562
2544 /* Nothing to do! */ 2563 /* Nothing to do! */
@@ -2641,6 +2660,9 @@ more:
2641 case CEPH_MSGR_TAG_ACK: 2660 case CEPH_MSGR_TAG_ACK:
2642 prepare_read_ack(con); 2661 prepare_read_ack(con);
2643 break; 2662 break;
2663 case CEPH_MSGR_TAG_KEEPALIVE2_ACK:
2664 prepare_read_keepalive_ack(con);
2665 break;
2644 case CEPH_MSGR_TAG_CLOSE: 2666 case CEPH_MSGR_TAG_CLOSE:
2645 con_close_socket(con); 2667 con_close_socket(con);
2646 con->state = CON_STATE_CLOSED; 2668 con->state = CON_STATE_CLOSED;
@@ -2684,6 +2706,12 @@ more:
2684 process_ack(con); 2706 process_ack(con);
2685 goto more; 2707 goto more;
2686 } 2708 }
2709 if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) {
2710 ret = read_keepalive_ack(con);
2711 if (ret <= 0)
2712 goto out;
2713 goto more;
2714 }
2687 2715
2688out: 2716out:
2689 dout("try_read done on %p ret %d\n", con, ret); 2717 dout("try_read done on %p ret %d\n", con, ret);
@@ -2799,7 +2827,7 @@ static void con_fault_finish(struct ceph_connection *con)
2799/* 2827/*
2800 * Do some work on a connection. Drop a connection ref when we're done. 2828 * Do some work on a connection. Drop a connection ref when we're done.
2801 */ 2829 */
2802static void con_work(struct work_struct *work) 2830static void ceph_con_workfn(struct work_struct *work)
2803{ 2831{
2804 struct ceph_connection *con = container_of(work, struct ceph_connection, 2832 struct ceph_connection *con = container_of(work, struct ceph_connection,
2805 work.work); 2833 work.work);
@@ -3101,6 +3129,20 @@ void ceph_con_keepalive(struct ceph_connection *con)
3101} 3129}
3102EXPORT_SYMBOL(ceph_con_keepalive); 3130EXPORT_SYMBOL(ceph_con_keepalive);
3103 3131
3132bool ceph_con_keepalive_expired(struct ceph_connection *con,
3133 unsigned long interval)
3134{
3135 if (interval > 0 &&
3136 (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
3137 struct timespec now = CURRENT_TIME;
3138 struct timespec ts;
3139 jiffies_to_timespec(interval, &ts);
3140 ts = timespec_add(con->last_keepalive_ack, ts);
3141 return timespec_compare(&now, &ts) >= 0;
3142 }
3143 return false;
3144}
3145
3104static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) 3146static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
3105{ 3147{
3106 struct ceph_msg_data *data; 3148 struct ceph_msg_data *data;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d6ff1215928..edda01626a45 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc)
149 CEPH_ENTITY_TYPE_MON, monc->cur_mon, 149 CEPH_ENTITY_TYPE_MON, monc->cur_mon,
150 &monc->monmap->mon_inst[monc->cur_mon].addr); 150 &monc->monmap->mon_inst[monc->cur_mon].addr);
151 151
152 /* send an initial keepalive to ensure our timestamp is
153 * valid by the time we are in an OPENED state */
154 ceph_con_keepalive(&monc->con);
155
152 /* initiatiate authentication handshake */ 156 /* initiatiate authentication handshake */
153 ret = ceph_auth_build_hello(monc->auth, 157 ret = ceph_auth_build_hello(monc->auth,
154 monc->m_auth->front.iov_base, 158 monc->m_auth->front.iov_base,
@@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc)
170 */ 174 */
171static void __schedule_delayed(struct ceph_mon_client *monc) 175static void __schedule_delayed(struct ceph_mon_client *monc)
172{ 176{
173 unsigned int delay; 177 struct ceph_options *opt = monc->client->options;
178 unsigned long delay;
174 179
175 if (monc->cur_mon < 0 || __sub_expired(monc)) 180 if (monc->cur_mon < 0 || __sub_expired(monc)) {
176 delay = 10 * HZ; 181 delay = 10 * HZ;
177 else 182 } else {
178 delay = 20 * HZ; 183 delay = 20 * HZ;
179 dout("__schedule_delayed after %u\n", delay); 184 if (opt->monc_ping_timeout > 0)
180 schedule_delayed_work(&monc->delayed_work, delay); 185 delay = min(delay, opt->monc_ping_timeout / 3);
186 }
187 dout("__schedule_delayed after %lu\n", delay);
188 schedule_delayed_work(&monc->delayed_work,
189 round_jiffies_relative(delay));
181} 190}
182 191
183/* 192/*
@@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work)
743 __close_session(monc); 752 __close_session(monc);
744 __open_session(monc); /* continue hunting */ 753 __open_session(monc); /* continue hunting */
745 } else { 754 } else {
746 ceph_con_keepalive(&monc->con); 755 struct ceph_options *opt = monc->client->options;
756 int is_auth = ceph_auth_is_authenticated(monc->auth);
757 if (ceph_con_keepalive_expired(&monc->con,
758 opt->monc_ping_timeout)) {
759 dout("monc keepalive timeout\n");
760 is_auth = 0;
761 __close_session(monc);
762 monc->hunting = true;
763 __open_session(monc);
764 }
747 765
748 __validate_auth(monc); 766 if (!monc->hunting) {
767 ceph_con_keepalive(&monc->con);
768 __validate_auth(monc);
769 }
749 770
750 if (ceph_auth_is_authenticated(monc->auth)) 771 if (is_auth)
751 __send_subscribe(monc); 772 __send_subscribe(monc);
752 } 773 }
753 __schedule_delayed(monc); 774 __schedule_delayed(monc);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 50033677c0fa..80b94e37c94a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2817,8 +2817,9 @@ out:
2817} 2817}
2818 2818
2819/* 2819/*
2820 * lookup and return message for incoming reply. set up reply message 2820 * Lookup and return message for incoming reply. Don't try to do
2821 * pages. 2821 * anything about a larger than preallocated data portion of the
2822 * message at the moment - for now, just skip the message.
2822 */ 2823 */
2823static struct ceph_msg *get_reply(struct ceph_connection *con, 2824static struct ceph_msg *get_reply(struct ceph_connection *con,
2824 struct ceph_msg_header *hdr, 2825 struct ceph_msg_header *hdr,
@@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2836 mutex_lock(&osdc->request_mutex); 2837 mutex_lock(&osdc->request_mutex);
2837 req = __lookup_request(osdc, tid); 2838 req = __lookup_request(osdc, tid);
2838 if (!req) { 2839 if (!req) {
2839 *skip = 1; 2840 pr_warn("%s osd%d tid %llu unknown, skipping\n",
2841 __func__, osd->o_osd, tid);
2840 m = NULL; 2842 m = NULL;
2841 dout("get_reply unknown tid %llu from osd%d\n", tid, 2843 *skip = 1;
2842 osd->o_osd);
2843 goto out; 2844 goto out;
2844 } 2845 }
2845 2846
@@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2849 ceph_msg_revoke_incoming(req->r_reply); 2850 ceph_msg_revoke_incoming(req->r_reply);
2850 2851
2851 if (front_len > req->r_reply->front_alloc_len) { 2852 if (front_len > req->r_reply->front_alloc_len) {
2852 pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", 2853 pr_warn("%s osd%d tid %llu front %d > preallocated %d\n",
2853 front_len, req->r_reply->front_alloc_len, 2854 __func__, osd->o_osd, req->r_tid, front_len,
2854 (unsigned int)con->peer_name.type, 2855 req->r_reply->front_alloc_len);
2855 le64_to_cpu(con->peer_name.num));
2856 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, 2856 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS,
2857 false); 2857 false);
2858 if (!m) 2858 if (!m)
@@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2860 ceph_msg_put(req->r_reply); 2860 ceph_msg_put(req->r_reply);
2861 req->r_reply = m; 2861 req->r_reply = m;
2862 } 2862 }
2863 m = ceph_msg_get(req->r_reply);
2864
2865 if (data_len > 0) {
2866 struct ceph_osd_data *osd_data;
2867 2863
2868 /* 2864 if (data_len > req->r_reply->data_length) {
2869 * XXX This is assuming there is only one op containing 2865 pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
2870 * XXX page data. Probably OK for reads, but this 2866 __func__, osd->o_osd, req->r_tid, data_len,
2871 * XXX ought to be done more generally. 2867 req->r_reply->data_length);
2872 */ 2868 m = NULL;
2873 osd_data = osd_req_op_extent_osd_data(req, 0); 2869 *skip = 1;
2874 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { 2870 goto out;
2875 if (osd_data->pages &&
2876 unlikely(osd_data->length < data_len)) {
2877
2878 pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n",
2879 tid, data_len, osd_data->length);
2880 *skip = 1;
2881 ceph_msg_put(m);
2882 m = NULL;
2883 goto out;
2884 }
2885 }
2886 } 2871 }
2887 *skip = 0; 2872
2873 m = ceph_msg_get(req->r_reply);
2888 dout("get_reply tid %lld %p\n", tid, m); 2874 dout("get_reply tid %lld %p\n", tid, m);
2889 2875
2890out: 2876out:
2891 mutex_unlock(&osdc->request_mutex); 2877 mutex_unlock(&osdc->request_mutex);
2892 return m; 2878 return m;
2893
2894} 2879}
2895 2880
2896static struct ceph_msg *alloc_msg(struct ceph_connection *con, 2881static struct ceph_msg *alloc_msg(struct ceph_connection *con,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4a3125836b64..7d8f581d9f1f 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1300,7 +1300,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1300 ceph_decode_addr(&addr); 1300 ceph_decode_addr(&addr);
1301 pr_info("osd%d up\n", osd); 1301 pr_info("osd%d up\n", osd);
1302 BUG_ON(osd >= map->max_osd); 1302 BUG_ON(osd >= map->max_osd);
1303 map->osd_state[osd] |= CEPH_OSD_UP; 1303 map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS;
1304 map->osd_addr[osd] = addr; 1304 map->osd_addr[osd] = addr;
1305 } 1305 }
1306 1306