diff options
| -rw-r--r-- | include/linux/ceph/libceph.h | 2 | ||||
| -rw-r--r-- | include/linux/ceph/messenger.h | 4 | ||||
| -rw-r--r-- | include/linux/ceph/msgr.h | 4 | ||||
| -rw-r--r-- | net/ceph/ceph_common.c | 1 | ||||
| -rw-r--r-- | net/ceph/messenger.c | 59 | ||||
| -rw-r--r-- | net/ceph/mon_client.c | 37 |
6 files changed, 93 insertions, 14 deletions
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9ebee53d3bf5..397c5cd09794 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
| @@ -46,6 +46,7 @@ struct ceph_options { | |||
| 46 | unsigned long mount_timeout; /* jiffies */ | 46 | unsigned long mount_timeout; /* jiffies */ |
| 47 | unsigned long osd_idle_ttl; /* jiffies */ | 47 | unsigned long osd_idle_ttl; /* jiffies */ |
| 48 | unsigned long osd_keepalive_timeout; /* jiffies */ | 48 | unsigned long osd_keepalive_timeout; /* jiffies */ |
| 49 | unsigned long monc_ping_timeout; /* jiffies */ | ||
| 49 | 50 | ||
| 50 | /* | 51 | /* |
| 51 | * any type that can't be simply compared or doesn't need need | 52 | * any type that can't be simply compared or doesn't need need |
| @@ -66,6 +67,7 @@ struct ceph_options { | |||
| 66 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) | 67 | #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) |
| 67 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) | 68 | #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) |
| 68 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) | 69 | #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) |
| 70 | #define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) | ||
| 69 | 71 | ||
| 70 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 72 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
| 71 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) | 73 | #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 37753278987a..7e1252e97a30 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
| @@ -248,6 +248,8 @@ struct ceph_connection { | |||
| 248 | int in_base_pos; /* bytes read */ | 248 | int in_base_pos; /* bytes read */ |
| 249 | __le64 in_temp_ack; /* for reading an ack */ | 249 | __le64 in_temp_ack; /* for reading an ack */ |
| 250 | 250 | ||
| 251 | struct timespec last_keepalive_ack; | ||
| 252 | |||
| 251 | struct delayed_work work; /* send|recv work */ | 253 | struct delayed_work work; /* send|recv work */ |
| 252 | unsigned long delay; /* current delay interval */ | 254 | unsigned long delay; /* current delay interval */ |
| 253 | }; | 255 | }; |
| @@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg); | |||
| 285 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); | 287 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); |
| 286 | 288 | ||
| 287 | extern void ceph_con_keepalive(struct ceph_connection *con); | 289 | extern void ceph_con_keepalive(struct ceph_connection *con); |
| 290 | extern bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
| 291 | unsigned long interval); | ||
| 288 | 292 | ||
| 289 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, | 293 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, |
| 290 | size_t length, size_t alignment); | 294 | size_t length, size_t alignment); |
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 1c1887206ffa..0fe2656ac415 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h | |||
| @@ -84,10 +84,12 @@ struct ceph_entity_inst { | |||
| 84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ | 84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ |
| 85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ | 85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ |
| 86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ | 86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ |
| 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | 87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ |
| 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ | 88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ |
| 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ | 89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ |
| 90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ | 90 | #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ |
| 91 | #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ | ||
| 92 | #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ | ||
| 91 | 93 | ||
| 92 | 94 | ||
| 93 | /* | 95 | /* |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index f30329f72641..3f56eefc2a07 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
| @@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
| 357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 357 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; |
| 358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; | 358 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; |
| 359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; | 359 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; |
| 360 | opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT; | ||
| 360 | 361 | ||
| 361 | /* get mon ip(s) */ | 362 | /* get mon ip(s) */ |
| 362 | /* ip1[:port1][,ip2[:port2]...] */ | 363 | /* ip1[:port1][,ip2[:port2]...] */ |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 101ab6285fba..36757d46ac40 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache; | |||
| 163 | static char tag_msg = CEPH_MSGR_TAG_MSG; | 163 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
| 164 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 164 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
| 165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | 165 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; |
| 166 | static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; | ||
| 166 | 167 | ||
| 167 | #ifdef CONFIG_LOCKDEP | 168 | #ifdef CONFIG_LOCKDEP |
| 168 | static struct lock_class_key socket_class; | 169 | static struct lock_class_key socket_class; |
| @@ -1351,7 +1352,15 @@ static void prepare_write_keepalive(struct ceph_connection *con) | |||
| 1351 | { | 1352 | { |
| 1352 | dout("prepare_write_keepalive %p\n", con); | 1353 | dout("prepare_write_keepalive %p\n", con); |
| 1353 | con_out_kvec_reset(con); | 1354 | con_out_kvec_reset(con); |
| 1354 | con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); | 1355 | if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { |
| 1356 | struct timespec ts = CURRENT_TIME; | ||
| 1357 | struct ceph_timespec ceph_ts; | ||
| 1358 | ceph_encode_timespec(&ceph_ts, &ts); | ||
| 1359 | con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); | ||
| 1360 | con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); | ||
| 1361 | } else { | ||
| 1362 | con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); | ||
| 1363 | } | ||
| 1355 | con_flag_set(con, CON_FLAG_WRITE_PENDING); | 1364 | con_flag_set(con, CON_FLAG_WRITE_PENDING); |
| 1356 | } | 1365 | } |
| 1357 | 1366 | ||
| @@ -1625,6 +1634,12 @@ static void prepare_read_tag(struct ceph_connection *con) | |||
| 1625 | con->in_tag = CEPH_MSGR_TAG_READY; | 1634 | con->in_tag = CEPH_MSGR_TAG_READY; |
| 1626 | } | 1635 | } |
| 1627 | 1636 | ||
| 1637 | static void prepare_read_keepalive_ack(struct ceph_connection *con) | ||
| 1638 | { | ||
| 1639 | dout("prepare_read_keepalive_ack %p\n", con); | ||
| 1640 | con->in_base_pos = 0; | ||
| 1641 | } | ||
| 1642 | |||
| 1628 | /* | 1643 | /* |
| 1629 | * Prepare to read a message. | 1644 | * Prepare to read a message. |
| 1630 | */ | 1645 | */ |
| @@ -2457,6 +2472,17 @@ static void process_message(struct ceph_connection *con) | |||
| 2457 | mutex_lock(&con->mutex); | 2472 | mutex_lock(&con->mutex); |
| 2458 | } | 2473 | } |
| 2459 | 2474 | ||
| 2475 | static int read_keepalive_ack(struct ceph_connection *con) | ||
| 2476 | { | ||
| 2477 | struct ceph_timespec ceph_ts; | ||
| 2478 | size_t size = sizeof(ceph_ts); | ||
| 2479 | int ret = read_partial(con, size, size, &ceph_ts); | ||
| 2480 | if (ret <= 0) | ||
| 2481 | return ret; | ||
| 2482 | ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts); | ||
| 2483 | prepare_read_tag(con); | ||
| 2484 | return 1; | ||
| 2485 | } | ||
| 2460 | 2486 | ||
| 2461 | /* | 2487 | /* |
| 2462 | * Write something to the socket. Called in a worker thread when the | 2488 | * Write something to the socket. Called in a worker thread when the |
| @@ -2526,6 +2552,10 @@ more_kvec: | |||
| 2526 | 2552 | ||
| 2527 | do_next: | 2553 | do_next: |
| 2528 | if (con->state == CON_STATE_OPEN) { | 2554 | if (con->state == CON_STATE_OPEN) { |
| 2555 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
| 2556 | prepare_write_keepalive(con); | ||
| 2557 | goto more; | ||
| 2558 | } | ||
| 2529 | /* is anything else pending? */ | 2559 | /* is anything else pending? */ |
| 2530 | if (!list_empty(&con->out_queue)) { | 2560 | if (!list_empty(&con->out_queue)) { |
| 2531 | prepare_write_message(con); | 2561 | prepare_write_message(con); |
| @@ -2535,10 +2565,6 @@ do_next: | |||
| 2535 | prepare_write_ack(con); | 2565 | prepare_write_ack(con); |
| 2536 | goto more; | 2566 | goto more; |
| 2537 | } | 2567 | } |
| 2538 | if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { | ||
| 2539 | prepare_write_keepalive(con); | ||
| 2540 | goto more; | ||
| 2541 | } | ||
| 2542 | } | 2568 | } |
| 2543 | 2569 | ||
| 2544 | /* Nothing to do! */ | 2570 | /* Nothing to do! */ |
| @@ -2641,6 +2667,9 @@ more: | |||
| 2641 | case CEPH_MSGR_TAG_ACK: | 2667 | case CEPH_MSGR_TAG_ACK: |
| 2642 | prepare_read_ack(con); | 2668 | prepare_read_ack(con); |
| 2643 | break; | 2669 | break; |
| 2670 | case CEPH_MSGR_TAG_KEEPALIVE2_ACK: | ||
| 2671 | prepare_read_keepalive_ack(con); | ||
| 2672 | break; | ||
| 2644 | case CEPH_MSGR_TAG_CLOSE: | 2673 | case CEPH_MSGR_TAG_CLOSE: |
| 2645 | con_close_socket(con); | 2674 | con_close_socket(con); |
| 2646 | con->state = CON_STATE_CLOSED; | 2675 | con->state = CON_STATE_CLOSED; |
| @@ -2684,6 +2713,12 @@ more: | |||
| 2684 | process_ack(con); | 2713 | process_ack(con); |
| 2685 | goto more; | 2714 | goto more; |
| 2686 | } | 2715 | } |
| 2716 | if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { | ||
| 2717 | ret = read_keepalive_ack(con); | ||
| 2718 | if (ret <= 0) | ||
| 2719 | goto out; | ||
| 2720 | goto more; | ||
| 2721 | } | ||
| 2687 | 2722 | ||
| 2688 | out: | 2723 | out: |
| 2689 | dout("try_read done on %p ret %d\n", con, ret); | 2724 | dout("try_read done on %p ret %d\n", con, ret); |
| @@ -3101,6 +3136,20 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
| 3101 | } | 3136 | } |
| 3102 | EXPORT_SYMBOL(ceph_con_keepalive); | 3137 | EXPORT_SYMBOL(ceph_con_keepalive); |
| 3103 | 3138 | ||
| 3139 | bool ceph_con_keepalive_expired(struct ceph_connection *con, | ||
| 3140 | unsigned long interval) | ||
| 3141 | { | ||
| 3142 | if (interval > 0 && | ||
| 3143 | (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { | ||
| 3144 | struct timespec now = CURRENT_TIME; | ||
| 3145 | struct timespec ts; | ||
| 3146 | jiffies_to_timespec(interval, &ts); | ||
| 3147 | ts = timespec_add(con->last_keepalive_ack, ts); | ||
| 3148 | return timespec_compare(&now, &ts) >= 0; | ||
| 3149 | } | ||
| 3150 | return false; | ||
| 3151 | } | ||
| 3152 | |||
| 3104 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) | 3153 | static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) |
| 3105 | { | 3154 | { |
| 3106 | struct ceph_msg_data *data; | 3155 | struct ceph_msg_data *data; |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9d6ff1215928..edda01626a45 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc) | |||
| 149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, | 149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, |
| 150 | &monc->monmap->mon_inst[monc->cur_mon].addr); | 150 | &monc->monmap->mon_inst[monc->cur_mon].addr); |
| 151 | 151 | ||
| 152 | /* send an initial keepalive to ensure our timestamp is | ||
| 153 | * valid by the time we are in an OPENED state */ | ||
| 154 | ceph_con_keepalive(&monc->con); | ||
| 155 | |||
| 152 | /* initiatiate authentication handshake */ | 156 | /* initiatiate authentication handshake */ |
| 153 | ret = ceph_auth_build_hello(monc->auth, | 157 | ret = ceph_auth_build_hello(monc->auth, |
| 154 | monc->m_auth->front.iov_base, | 158 | monc->m_auth->front.iov_base, |
| @@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc) | |||
| 170 | */ | 174 | */ |
| 171 | static void __schedule_delayed(struct ceph_mon_client *monc) | 175 | static void __schedule_delayed(struct ceph_mon_client *monc) |
| 172 | { | 176 | { |
| 173 | unsigned int delay; | 177 | struct ceph_options *opt = monc->client->options; |
| 178 | unsigned long delay; | ||
| 174 | 179 | ||
| 175 | if (monc->cur_mon < 0 || __sub_expired(monc)) | 180 | if (monc->cur_mon < 0 || __sub_expired(monc)) { |
| 176 | delay = 10 * HZ; | 181 | delay = 10 * HZ; |
| 177 | else | 182 | } else { |
| 178 | delay = 20 * HZ; | 183 | delay = 20 * HZ; |
| 179 | dout("__schedule_delayed after %u\n", delay); | 184 | if (opt->monc_ping_timeout > 0) |
| 180 | schedule_delayed_work(&monc->delayed_work, delay); | 185 | delay = min(delay, opt->monc_ping_timeout / 3); |
| 186 | } | ||
| 187 | dout("__schedule_delayed after %lu\n", delay); | ||
| 188 | schedule_delayed_work(&monc->delayed_work, | ||
| 189 | round_jiffies_relative(delay)); | ||
| 181 | } | 190 | } |
| 182 | 191 | ||
| 183 | /* | 192 | /* |
| @@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work) | |||
| 743 | __close_session(monc); | 752 | __close_session(monc); |
| 744 | __open_session(monc); /* continue hunting */ | 753 | __open_session(monc); /* continue hunting */ |
| 745 | } else { | 754 | } else { |
| 746 | ceph_con_keepalive(&monc->con); | 755 | struct ceph_options *opt = monc->client->options; |
| 756 | int is_auth = ceph_auth_is_authenticated(monc->auth); | ||
| 757 | if (ceph_con_keepalive_expired(&monc->con, | ||
| 758 | opt->monc_ping_timeout)) { | ||
| 759 | dout("monc keepalive timeout\n"); | ||
| 760 | is_auth = 0; | ||
| 761 | __close_session(monc); | ||
| 762 | monc->hunting = true; | ||
| 763 | __open_session(monc); | ||
| 764 | } | ||
| 747 | 765 | ||
| 748 | __validate_auth(monc); | 766 | if (!monc->hunting) { |
| 767 | ceph_con_keepalive(&monc->con); | ||
| 768 | __validate_auth(monc); | ||
| 769 | } | ||
| 749 | 770 | ||
| 750 | if (ceph_auth_is_authenticated(monc->auth)) | 771 | if (is_auth) |
| 751 | __send_subscribe(monc); | 772 | __send_subscribe(monc); |
| 752 | } | 773 | } |
| 753 | __schedule_delayed(monc); | 774 | __schedule_delayed(monc); |
