aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ceph/libceph.h2
-rw-r--r--include/linux/ceph/messenger.h4
-rw-r--r--include/linux/ceph/msgr.h4
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/messenger.c59
-rw-r--r--net/ceph/mon_client.c37
6 files changed, 93 insertions, 14 deletions
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 9ebee53d3bf5..397c5cd09794 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -46,6 +46,7 @@ struct ceph_options {
46 unsigned long mount_timeout; /* jiffies */ 46 unsigned long mount_timeout; /* jiffies */
47 unsigned long osd_idle_ttl; /* jiffies */ 47 unsigned long osd_idle_ttl; /* jiffies */
48 unsigned long osd_keepalive_timeout; /* jiffies */ 48 unsigned long osd_keepalive_timeout; /* jiffies */
49 unsigned long monc_ping_timeout; /* jiffies */
49 50
50 /* 51 /*
51 * any type that can't be simply compared or doesn't need need 52 * any type that can't be simply compared or doesn't need need
@@ -66,6 +67,7 @@ struct ceph_options {
66#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) 67#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
67#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) 68#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
68#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) 69#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
70#define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000)
69 71
70#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) 72#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
71#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) 73#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 37753278987a..7e1252e97a30 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -248,6 +248,8 @@ struct ceph_connection {
248 int in_base_pos; /* bytes read */ 248 int in_base_pos; /* bytes read */
249 __le64 in_temp_ack; /* for reading an ack */ 249 __le64 in_temp_ack; /* for reading an ack */
250 250
251 struct timespec last_keepalive_ack;
252
251 struct delayed_work work; /* send|recv work */ 253 struct delayed_work work; /* send|recv work */
252 unsigned long delay; /* current delay interval */ 254 unsigned long delay; /* current delay interval */
253}; 255};
@@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg);
285extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); 287extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
286 288
287extern void ceph_con_keepalive(struct ceph_connection *con); 289extern void ceph_con_keepalive(struct ceph_connection *con);
290extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
291 unsigned long interval);
288 292
289extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, 293extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
290 size_t length, size_t alignment); 294 size_t length, size_t alignment);
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 1c1887206ffa..0fe2656ac415 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -84,10 +84,12 @@ struct ceph_entity_inst {
84#define CEPH_MSGR_TAG_MSG 7 /* message */ 84#define CEPH_MSGR_TAG_MSG 7 /* message */
85#define CEPH_MSGR_TAG_ACK 8 /* message ack */ 85#define CEPH_MSGR_TAG_ACK 8 /* message ack */
86#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ 86#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */
87#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ 87#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
88#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ 88#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
89#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ 89#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */
90#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ 90#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
91#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
92#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
91 93
92 94
93/* 95/*
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index f30329f72641..3f56eefc2a07 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name,
357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
360 opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
360 361
361 /* get mon ip(s) */ 362 /* get mon ip(s) */
362 /* ip1[:port1][,ip2[:port2]...] */ 363 /* ip1[:port1][,ip2[:port2]...] */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 101ab6285fba..36757d46ac40 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache;
163static char tag_msg = CEPH_MSGR_TAG_MSG; 163static char tag_msg = CEPH_MSGR_TAG_MSG;
164static char tag_ack = CEPH_MSGR_TAG_ACK; 164static char tag_ack = CEPH_MSGR_TAG_ACK;
165static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 165static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
166static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
166 167
167#ifdef CONFIG_LOCKDEP 168#ifdef CONFIG_LOCKDEP
168static struct lock_class_key socket_class; 169static struct lock_class_key socket_class;
@@ -1351,7 +1352,15 @@ static void prepare_write_keepalive(struct ceph_connection *con)
1351{ 1352{
1352 dout("prepare_write_keepalive %p\n", con); 1353 dout("prepare_write_keepalive %p\n", con);
1353 con_out_kvec_reset(con); 1354 con_out_kvec_reset(con);
1354 con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); 1355 if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
1356 struct timespec ts = CURRENT_TIME;
1357 struct ceph_timespec ceph_ts;
1358 ceph_encode_timespec(&ceph_ts, &ts);
1359 con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
1360 con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts);
1361 } else {
1362 con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive);
1363 }
1355 con_flag_set(con, CON_FLAG_WRITE_PENDING); 1364 con_flag_set(con, CON_FLAG_WRITE_PENDING);
1356} 1365}
1357 1366
@@ -1625,6 +1634,12 @@ static void prepare_read_tag(struct ceph_connection *con)
1625 con->in_tag = CEPH_MSGR_TAG_READY; 1634 con->in_tag = CEPH_MSGR_TAG_READY;
1626} 1635}
1627 1636
1637static void prepare_read_keepalive_ack(struct ceph_connection *con)
1638{
1639 dout("prepare_read_keepalive_ack %p\n", con);
1640 con->in_base_pos = 0;
1641}
1642
1628/* 1643/*
1629 * Prepare to read a message. 1644 * Prepare to read a message.
1630 */ 1645 */
@@ -2457,6 +2472,17 @@ static void process_message(struct ceph_connection *con)
2457 mutex_lock(&con->mutex); 2472 mutex_lock(&con->mutex);
2458} 2473}
2459 2474
2475static int read_keepalive_ack(struct ceph_connection *con)
2476{
2477 struct ceph_timespec ceph_ts;
2478 size_t size = sizeof(ceph_ts);
2479 int ret = read_partial(con, size, size, &ceph_ts);
2480 if (ret <= 0)
2481 return ret;
2482 ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
2483 prepare_read_tag(con);
2484 return 1;
2485}
2460 2486
2461/* 2487/*
2462 * Write something to the socket. Called in a worker thread when the 2488 * Write something to the socket. Called in a worker thread when the
@@ -2526,6 +2552,10 @@ more_kvec:
2526 2552
2527do_next: 2553do_next:
2528 if (con->state == CON_STATE_OPEN) { 2554 if (con->state == CON_STATE_OPEN) {
2555 if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
2556 prepare_write_keepalive(con);
2557 goto more;
2558 }
2529 /* is anything else pending? */ 2559 /* is anything else pending? */
2530 if (!list_empty(&con->out_queue)) { 2560 if (!list_empty(&con->out_queue)) {
2531 prepare_write_message(con); 2561 prepare_write_message(con);
@@ -2535,10 +2565,6 @@ do_next:
2535 prepare_write_ack(con); 2565 prepare_write_ack(con);
2536 goto more; 2566 goto more;
2537 } 2567 }
2538 if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
2539 prepare_write_keepalive(con);
2540 goto more;
2541 }
2542 } 2568 }
2543 2569
2544 /* Nothing to do! */ 2570 /* Nothing to do! */
@@ -2641,6 +2667,9 @@ more:
2641 case CEPH_MSGR_TAG_ACK: 2667 case CEPH_MSGR_TAG_ACK:
2642 prepare_read_ack(con); 2668 prepare_read_ack(con);
2643 break; 2669 break;
2670 case CEPH_MSGR_TAG_KEEPALIVE2_ACK:
2671 prepare_read_keepalive_ack(con);
2672 break;
2644 case CEPH_MSGR_TAG_CLOSE: 2673 case CEPH_MSGR_TAG_CLOSE:
2645 con_close_socket(con); 2674 con_close_socket(con);
2646 con->state = CON_STATE_CLOSED; 2675 con->state = CON_STATE_CLOSED;
@@ -2684,6 +2713,12 @@ more:
2684 process_ack(con); 2713 process_ack(con);
2685 goto more; 2714 goto more;
2686 } 2715 }
2716 if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) {
2717 ret = read_keepalive_ack(con);
2718 if (ret <= 0)
2719 goto out;
2720 goto more;
2721 }
2687 2722
2688out: 2723out:
2689 dout("try_read done on %p ret %d\n", con, ret); 2724 dout("try_read done on %p ret %d\n", con, ret);
@@ -3101,6 +3136,20 @@ void ceph_con_keepalive(struct ceph_connection *con)
3101} 3136}
3102EXPORT_SYMBOL(ceph_con_keepalive); 3137EXPORT_SYMBOL(ceph_con_keepalive);
3103 3138
3139bool ceph_con_keepalive_expired(struct ceph_connection *con,
3140 unsigned long interval)
3141{
3142 if (interval > 0 &&
3143 (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
3144 struct timespec now = CURRENT_TIME;
3145 struct timespec ts;
3146 jiffies_to_timespec(interval, &ts);
3147 ts = timespec_add(con->last_keepalive_ack, ts);
3148 return timespec_compare(&now, &ts) >= 0;
3149 }
3150 return false;
3151}
3152
3104static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) 3153static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
3105{ 3154{
3106 struct ceph_msg_data *data; 3155 struct ceph_msg_data *data;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d6ff1215928..edda01626a45 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc)
149 CEPH_ENTITY_TYPE_MON, monc->cur_mon, 149 CEPH_ENTITY_TYPE_MON, monc->cur_mon,
150 &monc->monmap->mon_inst[monc->cur_mon].addr); 150 &monc->monmap->mon_inst[monc->cur_mon].addr);
151 151
152 /* send an initial keepalive to ensure our timestamp is
153 * valid by the time we are in an OPENED state */
154 ceph_con_keepalive(&monc->con);
155
152 /* initiatiate authentication handshake */ 156 /* initiatiate authentication handshake */
153 ret = ceph_auth_build_hello(monc->auth, 157 ret = ceph_auth_build_hello(monc->auth,
154 monc->m_auth->front.iov_base, 158 monc->m_auth->front.iov_base,
@@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc)
170 */ 174 */
171static void __schedule_delayed(struct ceph_mon_client *monc) 175static void __schedule_delayed(struct ceph_mon_client *monc)
172{ 176{
173 unsigned int delay; 177 struct ceph_options *opt = monc->client->options;
178 unsigned long delay;
174 179
175 if (monc->cur_mon < 0 || __sub_expired(monc)) 180 if (monc->cur_mon < 0 || __sub_expired(monc)) {
176 delay = 10 * HZ; 181 delay = 10 * HZ;
177 else 182 } else {
178 delay = 20 * HZ; 183 delay = 20 * HZ;
179 dout("__schedule_delayed after %u\n", delay); 184 if (opt->monc_ping_timeout > 0)
180 schedule_delayed_work(&monc->delayed_work, delay); 185 delay = min(delay, opt->monc_ping_timeout / 3);
186 }
187 dout("__schedule_delayed after %lu\n", delay);
188 schedule_delayed_work(&monc->delayed_work,
189 round_jiffies_relative(delay));
181} 190}
182 191
183/* 192/*
@@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work)
743 __close_session(monc); 752 __close_session(monc);
744 __open_session(monc); /* continue hunting */ 753 __open_session(monc); /* continue hunting */
745 } else { 754 } else {
746 ceph_con_keepalive(&monc->con); 755 struct ceph_options *opt = monc->client->options;
756 int is_auth = ceph_auth_is_authenticated(monc->auth);
757 if (ceph_con_keepalive_expired(&monc->con,
758 opt->monc_ping_timeout)) {
759 dout("monc keepalive timeout\n");
760 is_auth = 0;
761 __close_session(monc);
762 monc->hunting = true;
763 __open_session(monc);
764 }
747 765
748 __validate_auth(monc); 766 if (!monc->hunting) {
767 ceph_con_keepalive(&monc->con);
768 __validate_auth(monc);
769 }
749 770
750 if (ceph_auth_is_authenticated(monc->auth)) 771 if (is_auth)
751 __send_subscribe(monc); 772 __send_subscribe(monc);
752 } 773 }
753 __schedule_delayed(monc); 774 __schedule_delayed(monc);