summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2016-01-19 10:19:06 -0500
committerIlya Dryomov <idryomov@gmail.com>2016-03-25 13:51:38 -0400
commit82dcabad750a36a2b749889bc89c5a3188775b2e (patch)
tree26fcfa10f290b53b34ab5c3019338ca2da3a9569
parent0f9af169a1db62c33d87e4cfda46493907bd5537 (diff)
libceph: revamp subs code, switch to SUBSCRIBE2 protocol
It is currently hard-coded in the mon_client that mdsmap and monmap subs are continuous, while osdmap sub is always "onetime". To better handle full clusters/pools in the osd_client, we need to be able to issue continuous osdmap subs. Revamp subs code to allow us to specify for each sub whether it should be continuous or not. Although not strictly required for the above, switch to SUBSCRIBE2 protocol while at it, eliminating the ambiguity between a request for "every map since X" and a request for "just the latest" when we don't have a map yet (i.e. have epoch 0). SUBSCRIBE2 feature bit is now required - it's been supported since pre-argonaut (2010). Move "got mdsmap" call to the end of ceph_mdsc_handle_map() - calling in before we validate the epoch and successfully install the new map can mess up mon_client sub state. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/ceph/super.c2
-rw-r--r--include/linux/ceph/ceph_features.h2
-rw-r--r--include/linux/ceph/ceph_fs.h4
-rw-r--r--include/linux/ceph/mon_client.h28
-rw-r--r--net/ceph/debugfs.c17
-rw-r--r--net/ceph/mon_client.c210
-rw-r--r--net/ceph/osd_client.c3
8 files changed, 174 insertions, 95 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 911d64d865f1..b43399d22e23 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3764,7 +3764,6 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
3764 dout("handle_map epoch %u len %d\n", epoch, (int)maplen); 3764 dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
3765 3765
3766 /* do we need it? */ 3766 /* do we need it? */
3767 ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
3768 mutex_lock(&mdsc->mutex); 3767 mutex_lock(&mdsc->mutex);
3769 if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { 3768 if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
3770 dout("handle_map epoch %u <= our %u\n", 3769 dout("handle_map epoch %u <= our %u\n",
@@ -3791,6 +3790,8 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
3791 mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; 3790 mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
3792 3791
3793 __wake_requests(mdsc, &mdsc->waiting_for_map); 3792 __wake_requests(mdsc, &mdsc->waiting_for_map);
3793 ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
3794 mdsc->mdsmap->m_epoch);
3794 3795
3795 mutex_unlock(&mdsc->mutex); 3796 mutex_unlock(&mdsc->mutex);
3796 schedule_delayed(mdsc); 3797 schedule_delayed(mdsc);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index ca4d5e8457f1..c941fd1a8eb8 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
530 goto fail; 530 goto fail;
531 } 531 }
532 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 532 fsc->client->extra_mon_dispatch = extra_mon_dispatch;
533 fsc->client->monc.want_mdsmap = 1; 533 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 0, true);
534 534
535 fsc->mount_options = fsopt; 535 fsc->mount_options = fsopt;
536 536
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 15151f3c4120..ae2f66833762 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -105,6 +105,7 @@ static inline u64 ceph_sanitize_features(u64 features)
105 */ 105 */
106#define CEPH_FEATURES_SUPPORTED_DEFAULT \ 106#define CEPH_FEATURES_SUPPORTED_DEFAULT \
107 (CEPH_FEATURE_NOSRCADDR | \ 107 (CEPH_FEATURE_NOSRCADDR | \
108 CEPH_FEATURE_SUBSCRIBE2 | \
108 CEPH_FEATURE_RECONNECT_SEQ | \ 109 CEPH_FEATURE_RECONNECT_SEQ | \
109 CEPH_FEATURE_PGID64 | \ 110 CEPH_FEATURE_PGID64 | \
110 CEPH_FEATURE_PGPOOL3 | \ 111 CEPH_FEATURE_PGPOOL3 | \
@@ -127,6 +128,7 @@ static inline u64 ceph_sanitize_features(u64 features)
127 128
128#define CEPH_FEATURES_REQUIRED_DEFAULT \ 129#define CEPH_FEATURES_REQUIRED_DEFAULT \
129 (CEPH_FEATURE_NOSRCADDR | \ 130 (CEPH_FEATURE_NOSRCADDR | \
131 CEPH_FEATURE_SUBSCRIBE2 | \
130 CEPH_FEATURE_RECONNECT_SEQ | \ 132 CEPH_FEATURE_RECONNECT_SEQ | \
131 CEPH_FEATURE_PGID64 | \ 133 CEPH_FEATURE_PGID64 | \
132 CEPH_FEATURE_PGPOOL3 | \ 134 CEPH_FEATURE_PGPOOL3 | \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index d7d072a25c27..bf74005eedec 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -198,8 +198,8 @@ struct ceph_client_mount {
198#define CEPH_SUBSCRIBE_ONETIME 1 /* i want only 1 update after have */ 198#define CEPH_SUBSCRIBE_ONETIME 1 /* i want only 1 update after have */
199 199
200struct ceph_mon_subscribe_item { 200struct ceph_mon_subscribe_item {
201 __le64 have_version; __le64 have; 201 __le64 start;
202 __u8 onetime; 202 __u8 flags;
203} __attribute__ ((packed)); 203} __attribute__ ((packed));
204 204
205struct ceph_mon_subscribe_ack { 205struct ceph_mon_subscribe_ack {
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 81810dc21f06..8b2d2f0b659e 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -68,7 +68,8 @@ struct ceph_mon_client {
68 68
69 bool hunting; 69 bool hunting;
70 int cur_mon; /* last monitor i contacted */ 70 int cur_mon; /* last monitor i contacted */
71 unsigned long sub_sent, sub_renew_after; 71 unsigned long sub_renew_after;
72 unsigned long sub_renew_sent;
72 struct ceph_connection con; 73 struct ceph_connection con;
73 74
74 /* pending generic requests */ 75 /* pending generic requests */
@@ -76,10 +77,12 @@ struct ceph_mon_client {
76 int num_generic_requests; 77 int num_generic_requests;
77 u64 last_tid; 78 u64 last_tid;
78 79
79 /* mds/osd map */ 80 /* subs, indexed with CEPH_SUB_* */
80 int want_mdsmap; 81 struct {
81 int want_next_osdmap; /* 1 = want, 2 = want+asked */ 82 struct ceph_mon_subscribe_item item;
82 u32 have_osdmap, have_mdsmap; 83 bool want;
84 u32 have; /* epoch */
85 } subs[3];
83 86
84#ifdef CONFIG_DEBUG_FS 87#ifdef CONFIG_DEBUG_FS
85 struct dentry *debugfs_file; 88 struct dentry *debugfs_file;
@@ -93,14 +96,23 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
93extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); 96extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
94extern void ceph_monc_stop(struct ceph_mon_client *monc); 97extern void ceph_monc_stop(struct ceph_mon_client *monc);
95 98
99enum {
100 CEPH_SUB_MDSMAP = 0,
101 CEPH_SUB_MONMAP,
102 CEPH_SUB_OSDMAP,
103};
104
105extern const char *ceph_sub_str[];
106
96/* 107/*
97 * The model here is to indicate that we need a new map of at least 108 * The model here is to indicate that we need a new map of at least
98 * epoch @want, and also call in when we receive a map. We will 109 * epoch @epoch, and also call in when we receive a map. We will
99 * periodically rerequest the map from the monitor cluster until we 110 * periodically rerequest the map from the monitor cluster until we
100 * get what we want. 111 * get what we want.
101 */ 112 */
102extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have); 113bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
103extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have); 114 bool continuous);
115void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch);
104 116
105extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); 117extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
106extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, 118extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 593dc2eabcc8..b902fbc7863e 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -112,15 +112,20 @@ static int monc_show(struct seq_file *s, void *p)
112 struct ceph_mon_generic_request *req; 112 struct ceph_mon_generic_request *req;
113 struct ceph_mon_client *monc = &client->monc; 113 struct ceph_mon_client *monc = &client->monc;
114 struct rb_node *rp; 114 struct rb_node *rp;
115 int i;
115 116
116 mutex_lock(&monc->mutex); 117 mutex_lock(&monc->mutex);
117 118
118 if (monc->have_mdsmap) 119 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
119 seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap); 120 seq_printf(s, "have %s %u", ceph_sub_str[i],
120 if (monc->have_osdmap) 121 monc->subs[i].have);
121 seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap); 122 if (monc->subs[i].want)
122 if (monc->want_next_osdmap) 123 seq_printf(s, " want %llu%s",
123 seq_printf(s, "want next osdmap\n"); 124 le64_to_cpu(monc->subs[i].item.start),
125 (monc->subs[i].item.flags &
126 CEPH_SUBSCRIBE_ONETIME ? "" : "+"));
127 seq_putc(s, '\n');
128 }
124 129
125 for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { 130 for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
126 __u16 op; 131 __u16 op;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index d6af6ca26e8d..89029916315c 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -140,9 +140,8 @@ static int __open_session(struct ceph_mon_client *monc)
140 monc->cur_mon = r % monc->monmap->num_mon; 140 monc->cur_mon = r % monc->monmap->num_mon;
141 dout("open_session num=%d r=%d -> mon%d\n", 141 dout("open_session num=%d r=%d -> mon%d\n",
142 monc->monmap->num_mon, r, monc->cur_mon); 142 monc->monmap->num_mon, r, monc->cur_mon);
143 monc->sub_sent = 0;
144 monc->sub_renew_after = jiffies; /* i.e., expired */ 143 monc->sub_renew_after = jiffies; /* i.e., expired */
145 monc->want_next_osdmap = !!monc->want_next_osdmap; 144 monc->sub_renew_sent = 0;
146 145
147 dout("open_session mon%d opening\n", monc->cur_mon); 146 dout("open_session mon%d opening\n", monc->cur_mon);
148 ceph_con_open(&monc->con, 147 ceph_con_open(&monc->con,
@@ -189,59 +188,58 @@ static void __schedule_delayed(struct ceph_mon_client *monc)
189 round_jiffies_relative(delay)); 188 round_jiffies_relative(delay));
190} 189}
191 190
191const char *ceph_sub_str[] = {
192 [CEPH_SUB_MDSMAP] = "mdsmap",
193 [CEPH_SUB_MONMAP] = "monmap",
194 [CEPH_SUB_OSDMAP] = "osdmap",
195};
196
192/* 197/*
193 * Send subscribe request for mdsmap and/or osdmap. 198 * Send subscribe request for one or more maps, according to
199 * monc->subs.
194 */ 200 */
195static void __send_subscribe(struct ceph_mon_client *monc) 201static void __send_subscribe(struct ceph_mon_client *monc)
196{ 202{
197 dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n", 203 struct ceph_msg *msg = monc->m_subscribe;
198 (unsigned int)monc->sub_sent, __sub_expired(monc), 204 void *p = msg->front.iov_base;
199 monc->want_next_osdmap); 205 void *const end = p + msg->front_alloc_len;
200 if ((__sub_expired(monc) && !monc->sub_sent) || 206 int num = 0;
201 monc->want_next_osdmap == 1) { 207 int i;
202 struct ceph_msg *msg = monc->m_subscribe; 208
203 struct ceph_mon_subscribe_item *i; 209 dout("%s sent %lu\n", __func__, monc->sub_renew_sent);
204 void *p, *end; 210
205 int num; 211 BUG_ON(monc->cur_mon < 0);
206 212
207 p = msg->front.iov_base; 213 if (!monc->sub_renew_sent)
208 end = p + msg->front_alloc_len; 214 monc->sub_renew_sent = jiffies | 1; /* never 0 */
209 215
210 num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; 216 msg->hdr.version = cpu_to_le16(2);
211 ceph_encode_32(&p, num); 217
212 218 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
213 if (monc->want_next_osdmap) { 219 if (monc->subs[i].want)
214 dout("__send_subscribe to 'osdmap' %u\n", 220 num++;
215 (unsigned int)monc->have_osdmap);
216 ceph_encode_string(&p, end, "osdmap", 6);
217 i = p;
218 i->have = cpu_to_le64(monc->have_osdmap);
219 i->onetime = 1;
220 p += sizeof(*i);
221 monc->want_next_osdmap = 2; /* requested */
222 }
223 if (monc->want_mdsmap) {
224 dout("__send_subscribe to 'mdsmap' %u+\n",
225 (unsigned int)monc->have_mdsmap);
226 ceph_encode_string(&p, end, "mdsmap", 6);
227 i = p;
228 i->have = cpu_to_le64(monc->have_mdsmap);
229 i->onetime = 0;
230 p += sizeof(*i);
231 }
232 ceph_encode_string(&p, end, "monmap", 6);
233 i = p;
234 i->have = 0;
235 i->onetime = 0;
236 p += sizeof(*i);
237
238 msg->front.iov_len = p - msg->front.iov_base;
239 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
240 ceph_msg_revoke(msg);
241 ceph_con_send(&monc->con, ceph_msg_get(msg));
242
243 monc->sub_sent = jiffies | 1; /* never 0 */
244 } 221 }
222 BUG_ON(num < 1); /* monmap sub is always there */
223 ceph_encode_32(&p, num);
224 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
225 const char *s = ceph_sub_str[i];
226
227 if (!monc->subs[i].want)
228 continue;
229
230 dout("%s %s start %llu flags 0x%x\n", __func__, s,
231 le64_to_cpu(monc->subs[i].item.start),
232 monc->subs[i].item.flags);
233 ceph_encode_string(&p, end, s, strlen(s));
234 memcpy(p, &monc->subs[i].item, sizeof(monc->subs[i].item));
235 p += sizeof(monc->subs[i].item);
236 }
237
238 BUG_ON(p != (end - 35 - (ARRAY_SIZE(monc->subs) - num) * 19));
239 msg->front.iov_len = p - msg->front.iov_base;
240 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
241 ceph_msg_revoke(msg);
242 ceph_con_send(&monc->con, ceph_msg_get(msg));
245} 243}
246 244
247static void handle_subscribe_ack(struct ceph_mon_client *monc, 245static void handle_subscribe_ack(struct ceph_mon_client *monc,
@@ -255,9 +253,16 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
255 seconds = le32_to_cpu(h->duration); 253 seconds = le32_to_cpu(h->duration);
256 254
257 mutex_lock(&monc->mutex); 255 mutex_lock(&monc->mutex);
258 dout("handle_subscribe_ack after %d seconds\n", seconds); 256 if (monc->sub_renew_sent) {
259 monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1; 257 monc->sub_renew_after = monc->sub_renew_sent +
260 monc->sub_sent = 0; 258 (seconds >> 1) * HZ - 1;
259 dout("%s sent %lu duration %d renew after %lu\n", __func__,
260 monc->sub_renew_sent, seconds, monc->sub_renew_after);
261 monc->sub_renew_sent = 0;
262 } else {
263 dout("%s sent %lu renew after %lu, ignoring\n", __func__,
264 monc->sub_renew_sent, monc->sub_renew_after);
265 }
261 mutex_unlock(&monc->mutex); 266 mutex_unlock(&monc->mutex);
262 return; 267 return;
263bad: 268bad:
@@ -266,36 +271,82 @@ bad:
266} 271}
267 272
268/* 273/*
269 * Keep track of which maps we have 274 * Register interest in a map
275 *
276 * @sub: one of CEPH_SUB_*
277 * @epoch: X for "every map since X", or 0 for "just the latest"
270 */ 278 */
271int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) 279static bool __ceph_monc_want_map(struct ceph_mon_client *monc, int sub,
280 u32 epoch, bool continuous)
281{
282 __le64 start = cpu_to_le64(epoch);
283 u8 flags = !continuous ? CEPH_SUBSCRIBE_ONETIME : 0;
284
285 dout("%s %s epoch %u continuous %d\n", __func__, ceph_sub_str[sub],
286 epoch, continuous);
287
288 if (monc->subs[sub].want &&
289 monc->subs[sub].item.start == start &&
290 monc->subs[sub].item.flags == flags)
291 return false;
292
293 monc->subs[sub].item.start = start;
294 monc->subs[sub].item.flags = flags;
295 monc->subs[sub].want = true;
296
297 return true;
298}
299
300bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
301 bool continuous)
272{ 302{
303 bool need_request;
304
273 mutex_lock(&monc->mutex); 305 mutex_lock(&monc->mutex);
274 monc->have_mdsmap = got; 306 need_request = __ceph_monc_want_map(monc, sub, epoch, continuous);
275 mutex_unlock(&monc->mutex); 307 mutex_unlock(&monc->mutex);
276 return 0; 308
309 return need_request;
277} 310}
278EXPORT_SYMBOL(ceph_monc_got_mdsmap); 311EXPORT_SYMBOL(ceph_monc_want_map);
279 312
280int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) 313/*
314 * Keep track of which maps we have
315 *
316 * @sub: one of CEPH_SUB_*
317 */
318static void __ceph_monc_got_map(struct ceph_mon_client *monc, int sub,
319 u32 epoch)
320{
321 dout("%s %s epoch %u\n", __func__, ceph_sub_str[sub], epoch);
322
323 if (monc->subs[sub].want) {
324 if (monc->subs[sub].item.flags & CEPH_SUBSCRIBE_ONETIME)
325 monc->subs[sub].want = false;
326 else
327 monc->subs[sub].item.start = cpu_to_le64(epoch + 1);
328 }
329
330 monc->subs[sub].have = epoch;
331}
332
333void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch)
281{ 334{
282 mutex_lock(&monc->mutex); 335 mutex_lock(&monc->mutex);
283 monc->have_osdmap = got; 336 __ceph_monc_got_map(monc, sub, epoch);
284 monc->want_next_osdmap = 0;
285 mutex_unlock(&monc->mutex); 337 mutex_unlock(&monc->mutex);
286 return 0;
287} 338}
339EXPORT_SYMBOL(ceph_monc_got_map);
288 340
289/* 341/*
290 * Register interest in the next osdmap 342 * Register interest in the next osdmap
291 */ 343 */
292void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) 344void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
293{ 345{
294 dout("request_next_osdmap have %u\n", monc->have_osdmap); 346 dout("%s have %u\n", __func__, monc->subs[CEPH_SUB_OSDMAP].have);
295 mutex_lock(&monc->mutex); 347 mutex_lock(&monc->mutex);
296 if (!monc->want_next_osdmap) 348 if (__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP,
297 monc->want_next_osdmap = 1; 349 monc->subs[CEPH_SUB_OSDMAP].have + 1, false))
298 if (monc->want_next_osdmap < 2)
299 __send_subscribe(monc); 350 __send_subscribe(monc);
300 mutex_unlock(&monc->mutex); 351 mutex_unlock(&monc->mutex);
301} 352}
@@ -314,15 +365,15 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
314 long ret; 365 long ret;
315 366
316 mutex_lock(&monc->mutex); 367 mutex_lock(&monc->mutex);
317 while (monc->have_osdmap < epoch) { 368 while (monc->subs[CEPH_SUB_OSDMAP].have < epoch) {
318 mutex_unlock(&monc->mutex); 369 mutex_unlock(&monc->mutex);
319 370
320 if (timeout && time_after_eq(jiffies, started + timeout)) 371 if (timeout && time_after_eq(jiffies, started + timeout))
321 return -ETIMEDOUT; 372 return -ETIMEDOUT;
322 373
323 ret = wait_event_interruptible_timeout(monc->client->auth_wq, 374 ret = wait_event_interruptible_timeout(monc->client->auth_wq,
324 monc->have_osdmap >= epoch, 375 monc->subs[CEPH_SUB_OSDMAP].have >= epoch,
325 ceph_timeout_jiffies(timeout)); 376 ceph_timeout_jiffies(timeout));
326 if (ret < 0) 377 if (ret < 0)
327 return ret; 378 return ret;
328 379
@@ -335,11 +386,14 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
335EXPORT_SYMBOL(ceph_monc_wait_osdmap); 386EXPORT_SYMBOL(ceph_monc_wait_osdmap);
336 387
337/* 388/*
338 * 389 * Open a session with a random monitor. Request monmap and osdmap,
390 * which are waited upon in __ceph_open_session().
339 */ 391 */
340int ceph_monc_open_session(struct ceph_mon_client *monc) 392int ceph_monc_open_session(struct ceph_mon_client *monc)
341{ 393{
342 mutex_lock(&monc->mutex); 394 mutex_lock(&monc->mutex);
395 __ceph_monc_want_map(monc, CEPH_SUB_MONMAP, 0, true);
396 __ceph_monc_want_map(monc, CEPH_SUB_OSDMAP, 0, false);
343 __open_session(monc); 397 __open_session(monc);
344 __schedule_delayed(monc); 398 __schedule_delayed(monc);
345 mutex_unlock(&monc->mutex); 399 mutex_unlock(&monc->mutex);
@@ -375,6 +429,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
375 client->monc.monmap = monmap; 429 client->monc.monmap = monmap;
376 kfree(old); 430 kfree(old);
377 431
432 __ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch);
378 client->have_fsid = true; 433 client->have_fsid = true;
379 434
380out: 435out:
@@ -725,8 +780,14 @@ static void delayed_work(struct work_struct *work)
725 __validate_auth(monc); 780 __validate_auth(monc);
726 } 781 }
727 782
728 if (is_auth) 783 if (is_auth) {
729 __send_subscribe(monc); 784 unsigned long now = jiffies;
785
786 dout("%s renew subs? now %lu renew after %lu\n",
787 __func__, now, monc->sub_renew_after);
788 if (time_after_eq(now, monc->sub_renew_after))
789 __send_subscribe(monc);
790 }
730 } 791 }
731 __schedule_delayed(monc); 792 __schedule_delayed(monc);
732 mutex_unlock(&monc->mutex); 793 mutex_unlock(&monc->mutex);
@@ -815,16 +876,13 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
815 monc->cur_mon = -1; 876 monc->cur_mon = -1;
816 monc->hunting = true; 877 monc->hunting = true;
817 monc->sub_renew_after = jiffies; 878 monc->sub_renew_after = jiffies;
818 monc->sub_sent = 0; 879 monc->sub_renew_sent = 0;
819 880
820 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); 881 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
821 monc->generic_request_tree = RB_ROOT; 882 monc->generic_request_tree = RB_ROOT;
822 monc->num_generic_requests = 0; 883 monc->num_generic_requests = 0;
823 monc->last_tid = 0; 884 monc->last_tid = 0;
824 885
825 monc->have_mdsmap = 0;
826 monc->have_osdmap = 0;
827 monc->want_next_osdmap = 1;
828 return 0; 886 return 0;
829 887
830out_auth_reply: 888out_auth_reply:
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5bc053778fed..3309112e23d0 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2187,7 +2187,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
2187 goto bad; 2187 goto bad;
2188done: 2188done:
2189 downgrade_write(&osdc->map_sem); 2189 downgrade_write(&osdc->map_sem);
2190 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 2190 ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
2191 osdc->osdmap->epoch);
2191 2192
2192 /* 2193 /*
2193 * subscribe to subsequent osdmap updates if full to ensure 2194 * subscribe to subsequent osdmap updates if full to ensure