diff options
-rw-r--r-- | include/linux/ceph/libceph.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 4 | ||||
-rw-r--r-- | include/linux/ceph/msgr.h | 4 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 1 | ||||
-rw-r--r-- | net/ceph/messenger.c | 59 | ||||
-rw-r--r-- | net/ceph/mon_client.c | 37 |
6 files changed, 93 insertions, 14 deletions
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9ebee53d3bf5..397c5cd09794 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
@@ -46,6 +46,7 @@ struct ceph_options { | |||
46 | unsigned long mount_timeout; /* jiffies */ | 46 | unsigned long mount_timeout; /* jiffies */ |
47 | unsigned long osd_idle_ttl; /* jiffies */ | 47 | unsigned long osd_idle_ttl; /* jiffies */ |
48 | unsigned long osd_keepalive_timeout; /* jiffies */ | 48 | unsigned long osd_keepalive_timeout; /* jiffies */ |
49 | unsigned long monc_ping_timeout; /* jiffies */ | ||
49 | 50 | ||
50 | /* | 51 | /* |
51 | * any type that can't be simply compared or doesn't need need | 52 | * any type that can't be simply compared or doesn't need need |
@@ -66,6 +67,7 @@ struct ceph_options { | |||
66 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) | 67 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) |
67 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) | 68 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) |
68 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) | 69 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) |
70 | #define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) | ||
69 | 71 | ||
70 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 72 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
71 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) | 73 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 37753278987a..7e1252e97a30 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -248,6 +248,8 @@ struct ceph_connection { | |||
248 | int in_base_pos; /* bytes read */ | 248 | int in_base_pos; /* bytes read */ |
249 | __le64 in_temp_ack; /* for reading an ack */ | 249 | __le64 in_temp_ack; /* for reading an ack */ |
250 | 250 | ||
251 | struct timespec last_keepalive_ack; | ||
252 | |||
251 | struct delayed_work work; /* send|recv work */ | 253 | struct delayed_work work; /* send|recv work */ |
252 | unsigned long delay; /* current delay interval */ | 254 | unsigned long delay; /* current delay interval */ |
253 | }; | 255 | }; |
@@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg); | |||
285 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); | 287 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); |
286 | 288 | ||
287 | extern void ceph_con_keepalive(struct ceph_connection *con); | 289 | extern void ceph_con_keepalive(struct ceph_connection *con); |
290 | extern bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
291 | unsigned long interval); | ||
288 | 292 | ||
289 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, | 293 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, |
290 | size_t length, size_t alignment); | 294 | size_t length, size_t alignment); |
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 1c1887206ffa..0fe2656ac415 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
@@ -84,10 +84,12 @@ struct ceph_entity_inst { | |||
84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ | 84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ |
85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ | 85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ |
86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ | 86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ |
87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ |
88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ | 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ |
89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ | 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ |
90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ | 90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ |
91 | #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ | ||
92 | #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ | ||
91 | 93 | ||
92 | 94 | ||
93 | /* | 95 | /* |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index f30329f72641..3f56eefc2a07 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; |
358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; | 358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; |
359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; | 359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; |
360 | opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT; | ||
360 | 361 | ||
361 | /* get mon ip(s) */ | 362 | /* get mon ip(s) */ |
362 | /* ip1[:port1][,ip2[:port2]...] */ | 363 | /* ip1[:port1][,ip2[:port2]...] */ |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 101ab6285fba..36757d46ac40 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache; | |||
163 | static char tag_msg = CEPH_MSGR_TAG_MSG; | 163 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
164 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 164 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | 165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; |
166 | static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; | ||
166 | 167 | ||
167 | #ifdef CONFIG_LOCKDEP | 168 | #ifdef CONFIG_LOCKDEP |
168 | static struct lock_class_key socket_class; | 169 | static struct lock_class_key socket_class; |
@@ -1351,7 +1352,15 @@ static void prepare_write_keepalive(struct ceph_connection *con) | |||
1351 | { | 1352 | { |
1352 | dout("prepare_write_keepalive %p\n", con); | 1353 | dout("prepare_write_keepalive %p\n", con); |
1353 | con_out_kvec_reset(con); | 1354 | con_out_kvec_reset(con); |
1354 | con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); | 1355 | if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { |
1356 | struct timespec ts = CURRENT_TIME; | ||
1357 | struct ceph_timespec ceph_ts; | ||
1358 | ceph_encode_timespec(&ceph_ts, &ts); | ||
1359 | con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); | ||
1360 | con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); | ||
1361 | } else { | ||
1362 | con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); | ||
1363 | } | ||
1355 | con_flag_set(con, CON_FLAG_WRITE_PENDING); | 1364 | con_flag_set(con, CON_FLAG_WRITE_PENDING); |
1356 | } | 1365 | } |
1357 | 1366 | ||
@@ -1625,6 +1634,12 @@ static void prepare_read_tag(struct ceph_connection *con) | |||
1625 | con->in_tag = CEPH_MSGR_TAG_READY; | 1634 | con->in_tag = CEPH_MSGR_TAG_READY; |
1626 | } | 1635 | } |
1627 | 1636 | ||
1637 | static void prepare_read_keepalive_ack(struct ceph_connection *con) | ||
1638 | { | ||
1639 | dout("prepare_read_keepalive_ack %p\n", con); | ||
1640 | con->in_base_pos = 0; | ||
1641 | } | ||
1642 | |||
1628 | /* | 1643 | /* |
1629 | * Prepare to read a message. | 1644 | * Prepare to read a message. |
1630 | */ | 1645 | */ |
@@ -2457,6 +2472,17 @@ static void process_message(struct ceph_connection *con) | |||
2457 | mutex_lock(&con->mutex); | 2472 | mutex_lock(&con->mutex); |
2458 | } | 2473 | } |
2459 | 2474 | ||
2475 | static int read_keepalive_ack(struct ceph_connection *con) | ||
2476 | { | ||
2477 | struct ceph_timespec ceph_ts; | ||
2478 | size_t size = sizeof(ceph_ts); | ||
2479 | int ret = read_partial(con, size, size, &ceph_ts); | ||
2480 | if (ret <= 0) | ||
2481 | return ret; | ||
2482 | ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts); | ||
2483 | prepare_read_tag(con); | ||
2484 | return 1; | ||
2485 | } | ||
2460 | 2486 | ||
2461 | /* | 2487 | /* |
2462 | * Write something to the socket. Called in a worker thread when the | 2488 | * Write something to the socket. Called in a worker thread when the |
@@ -2526,6 +2552,10 @@ more_kvec: | |||
2526 | 2552 | ||
2527 | do_next: | 2553 | do_next: |
2528 | if (con->state == CON_STATE_OPEN) { | 2554 | if (con->state == CON_STATE_OPEN) { |
2555 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
2556 | prepare_write_keepalive(con); | ||
2557 | goto more; | ||
2558 | } | ||
2529 | /* is anything else pending? */ | 2559 | /* is anything else pending? */ |
2530 | if (!list_empty(&con->out_queue)) { | 2560 | if (!list_empty(&con->out_queue)) { |
2531 | prepare_write_message(con); | 2561 | prepare_write_message(con); |
@@ -2535,10 +2565,6 @@ do_next: | |||
2535 | prepare_write_ack(con); | 2565 | prepare_write_ack(con); |
2536 | goto more; | 2566 | goto more; |
2537 | } | 2567 | } |
2538 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
2539 | prepare_write_keepalive(con); | ||
2540 | goto more; | ||
2541 | } | ||
2542 | } | 2568 | } |
2543 | 2569 | ||
2544 | /* Nothing to do! */ | 2570 | /* Nothing to do! */ |
@@ -2641,6 +2667,9 @@ more: | |||
2641 | case CEPH_MSGR_TAG_ACK: | 2667 | case CEPH_MSGR_TAG_ACK: |
2642 | prepare_read_ack(con); | 2668 | prepare_read_ack(con); |
2643 | break; | 2669 | break; |
2670 | case CEPH_MSGR_TAG_KEEPALIVE2_ACK: | ||
2671 | prepare_read_keepalive_ack(con); | ||
2672 | break; | ||
2644 | case CEPH_MSGR_TAG_CLOSE: | 2673 | case CEPH_MSGR_TAG_CLOSE: |
2645 | con_close_socket(con); | 2674 | con_close_socket(con); |
2646 | con->state = CON_STATE_CLOSED; | 2675 | con->state = CON_STATE_CLOSED; |
@@ -2684,6 +2713,12 @@ more: | |||
2684 | process_ack(con); | 2713 | process_ack(con); |
2685 | goto more; | 2714 | goto more; |
2686 | } | 2715 | } |
2716 | if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { | ||
2717 | ret = read_keepalive_ack(con); | ||
2718 | if (ret <= 0) | ||
2719 | goto out; | ||
2720 | goto more; | ||
2721 | } | ||
2687 | 2722 | ||
2688 | out: | 2723 | out: |
2689 | dout("try_read done on %p ret %d\n", con, ret); | 2724 | dout("try_read done on %p ret %d\n", con, ret); |
@@ -3101,6 +3136,20 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
3101 | } | 3136 | } |
3102 | EXPORT_SYMBOL(ceph_con_keepalive); | 3137 | EXPORT_SYMBOL(ceph_con_keepalive); |
3103 | 3138 | ||
3139 | bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
3140 | unsigned long interval) | ||
3141 | { | ||
3142 | if (interval > 0 && | ||
3143 | (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { | ||
3144 | struct timespec now = CURRENT_TIME; | ||
3145 | struct timespec ts; | ||
3146 | jiffies_to_timespec(interval, &ts); | ||
3147 | ts = timespec_add(con->last_keepalive_ack, ts); | ||
3148 | return timespec_compare(&now, &ts) >= 0; | ||
3149 | } | ||
3150 | return false; | ||
3151 | } | ||
3152 | |||
3104 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) | 3153 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) |
3105 | { | 3154 | { |
3106 | struct ceph_msg_data *data; | 3155 | struct ceph_msg_data *data; |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9d6ff1215928..edda01626a45 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
@@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc) | |||
149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, | 149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, |
150 | &monc->monmap->mon_inst[monc->cur_mon].addr); | 150 | &monc->monmap->mon_inst[monc->cur_mon].addr); |
151 | 151 | ||
152 | /* send an initial keepalive to ensure our timestamp is | ||
153 | * valid by the time we are in an OPENED state */ | ||
154 | ceph_con_keepalive(&monc->con); | ||
155 | |||
152 | /* initiatiate authentication handshake */ | 156 | /* initiatiate authentication handshake */ |
153 | ret = ceph_auth_build_hello(monc->auth, | 157 | ret = ceph_auth_build_hello(monc->auth, |
154 | monc->m_auth->front.iov_base, | 158 | monc->m_auth->front.iov_base, |
@@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc) | |||
170 | */ | 174 | */ |
171 | static void __schedule_delayed(struct ceph_mon_client *monc) | 175 | static void __schedule_delayed(struct ceph_mon_client *monc) |
172 | { | 176 | { |
173 | unsigned int delay; | 177 | struct ceph_options *opt = monc->client->options; |
178 | unsigned long delay; | ||
174 | 179 | ||
175 | if (monc->cur_mon < 0 || __sub_expired(monc)) | 180 | if (monc->cur_mon < 0 || __sub_expired(monc)) { |
176 | delay = 10 * HZ; | 181 | delay = 10 * HZ; |
177 | else | 182 | } else { |
178 | delay = 20 * HZ; | 183 | delay = 20 * HZ; |
179 | dout("__schedule_delayed after %u\n", delay); | 184 | if (opt->monc_ping_timeout > 0) |
180 | schedule_delayed_work(&monc->delayed_work, delay); | 185 | delay = min(delay, opt->monc_ping_timeout / 3); |
186 | } | ||
187 | dout("__schedule_delayed after %lu\n", delay); | ||
188 | schedule_delayed_work(&monc->delayed_work, | ||
189 | round_jiffies_relative(delay)); | ||
181 | } | 190 | } |
182 | 191 | ||
183 | /* | 192 | /* |
@@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work) | |||
743 | __close_session(monc); | 752 | __close_session(monc); |
744 | __open_session(monc); /* continue hunting */ | 753 | __open_session(monc); /* continue hunting */ |
745 | } else { | 754 | } else { |
746 | ceph_con_keepalive(&monc->con); | 755 | struct ceph_options *opt = monc->client->options; |
756 | int is_auth = ceph_auth_is_authenticated(monc->auth); | ||
757 | if (ceph_con_keepalive_expired(&monc->con, | ||
758 | opt->monc_ping_timeout)) { | ||
759 | dout("monc keepalive timeout\n"); | ||
760 | is_auth = 0; | ||
761 | __close_session(monc); | ||
762 | monc->hunting = true; | ||
763 | __open_session(monc); | ||
764 | } | ||
747 | 765 | ||
748 | __validate_auth(monc); | 766 | if (!monc->hunting) { |
767 | ceph_con_keepalive(&monc->con); | ||
768 | __validate_auth(monc); | ||
769 | } | ||
749 | 770 | ||
750 | if (ceph_auth_is_authenticated(monc->auth)) | 771 | if (is_auth) |
751 | __send_subscribe(monc); | 772 | __send_subscribe(monc); |
752 | } | 773 | } |
753 | __schedule_delayed(monc); | 774 | __schedule_delayed(monc); |