aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/00-INDEX2
-rw-r--r--Documentation/filesystems/ceph.txt11
-rw-r--r--MAINTAINERS2
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/auth_x.c53
-rw-r--r--fs/ceph/caps.c73
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/mds_client.c43
-rw-r--r--fs/ceph/messenger.c19
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c29
-rw-r--r--fs/ceph/osd_client.h2
-rw-r--r--fs/ceph/osdmap.c17
-rw-r--r--fs/ceph/snap.c6
15 files changed, 191 insertions, 97 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 3bae418c6ad..4303614b5ad 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -16,6 +16,8 @@ befs.txt
16 - information about the BeOS filesystem for Linux. 16 - information about the BeOS filesystem for Linux.
17bfs.txt 17bfs.txt
18 - info for the SCO UnixWare Boot Filesystem (BFS). 18 - info for the SCO UnixWare Boot Filesystem (BFS).
19ceph.txt
20 - info for the Ceph Distributed File System
19cifs.txt 21cifs.txt
20 - description of the CIFS filesystem. 22 - description of the CIFS filesystem.
21coda.txt 23coda.txt
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
index 6e03917316b..0660c9f5dee 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.txt
@@ -8,7 +8,7 @@ Basic features include:
8 8
9 * POSIX semantics 9 * POSIX semantics
10 * Seamless scaling from 1 to many thousands of nodes 10 * Seamless scaling from 1 to many thousands of nodes
11 * High availability and reliability. No single points of failure. 11 * High availability and reliability. No single point of failure.
12 * N-way replication of data across storage nodes 12 * N-way replication of data across storage nodes
13 * Fast recovery from node failures 13 * Fast recovery from node failures
14 * Automatic rebalancing of data on node addition/removal 14 * Automatic rebalancing of data on node addition/removal
@@ -94,7 +94,7 @@ Mount Options
94 94
95 wsize=X 95 wsize=X
96 Specify the maximum write size in bytes. By default there is no 96 Specify the maximum write size in bytes. By default there is no
97 maximu. Ceph will normally size writes based on the file stripe 97 maximum. Ceph will normally size writes based on the file stripe
98 size. 98 size.
99 99
100 rsize=X 100 rsize=X
@@ -115,7 +115,7 @@ Mount Options
115 number of entries in that directory. 115 number of entries in that directory.
116 116
117 nocrc 117 nocrc
118 Disable CRC32C calculation for data writes. If set, the OSD 118 Disable CRC32C calculation for data writes. If set, the storage node
119 must rely on TCP's error correction to detect data corruption 119 must rely on TCP's error correction to detect data corruption
120 in the data payload. 120 in the data payload.
121 121
@@ -133,7 +133,8 @@ For more information on Ceph, see the home page at
133 http://ceph.newdream.net/ 133 http://ceph.newdream.net/
134 134
135The Linux kernel client source tree is available at 135The Linux kernel client source tree is available at
136 git://ceph.newdream.net/linux-ceph-client.git 136 git://ceph.newdream.net/git/ceph-client.git
137 git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
137 138
138and the source for the full system is at 139and the source for the full system is at
139 git://ceph.newdream.net/ceph.git 140 git://ceph.newdream.net/git/ceph.git
diff --git a/MAINTAINERS b/MAINTAINERS
index 1a203f9626f..088bd41ac71 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1443,7 +1443,7 @@ F: arch/powerpc/platforms/cell/
1443 1443
1444CEPH DISTRIBUTED FILE SYSTEM CLIENT 1444CEPH DISTRIBUTED FILE SYSTEM CLIENT
1445M: Sage Weil <sage@newdream.net> 1445M: Sage Weil <sage@newdream.net>
1446L: ceph-devel@lists.sourceforge.net 1446L: ceph-devel@vger.kernel.org
1447W: http://ceph.newdream.net/ 1447W: http://ceph.newdream.net/
1448T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git 1448T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
1449S: Supported 1449S: Supported
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 23bb0ceabe3..ce8ef610772 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -919,6 +919,10 @@ static int context_is_writeable_or_written(struct inode *inode,
919/* 919/*
920 * We are only allowed to write into/dirty the page if the page is 920 * We are only allowed to write into/dirty the page if the page is
921 * clean, or already dirty within the same snap context. 921 * clean, or already dirty within the same snap context.
922 *
923 * called with page locked.
924 * return success with page locked,
925 * or any failure (incl -EAGAIN) with page unlocked.
922 */ 926 */
923static int ceph_update_writeable_page(struct file *file, 927static int ceph_update_writeable_page(struct file *file,
924 loff_t pos, unsigned len, 928 loff_t pos, unsigned len,
@@ -961,9 +965,11 @@ retry_locked:
961 snapc = ceph_get_snap_context((void *)page->private); 965 snapc = ceph_get_snap_context((void *)page->private);
962 unlock_page(page); 966 unlock_page(page);
963 ceph_queue_writeback(inode); 967 ceph_queue_writeback(inode);
964 wait_event_interruptible(ci->i_cap_wq, 968 r = wait_event_interruptible(ci->i_cap_wq,
965 context_is_writeable_or_written(inode, snapc)); 969 context_is_writeable_or_written(inode, snapc));
966 ceph_put_snap_context(snapc); 970 ceph_put_snap_context(snapc);
971 if (r == -ERESTARTSYS)
972 return r;
967 return -EAGAIN; 973 return -EAGAIN;
968 } 974 }
969 975
@@ -1035,7 +1041,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1035 int r; 1041 int r;
1036 1042
1037 do { 1043 do {
1038 /* get a page*/ 1044 /* get a page */
1039 page = grab_cache_page_write_begin(mapping, index, 0); 1045 page = grab_cache_page_write_begin(mapping, index, 0);
1040 if (!page) 1046 if (!page)
1041 return -ENOMEM; 1047 return -ENOMEM;
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index f0318427b6d..8d8a8496476 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -28,6 +28,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
28 return (ac->want_keys & xi->have_keys) == ac->want_keys; 28 return (ac->want_keys & xi->have_keys) == ac->want_keys;
29} 29}
30 30
31static int ceph_x_encrypt_buflen(int ilen)
32{
33 return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
34 sizeof(u32);
35}
36
31static int ceph_x_encrypt(struct ceph_crypto_key *secret, 37static int ceph_x_encrypt(struct ceph_crypto_key *secret,
32 void *ibuf, int ilen, void *obuf, size_t olen) 38 void *ibuf, int ilen, void *obuf, size_t olen)
33{ 39{
@@ -150,6 +156,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
150 struct timespec validity; 156 struct timespec validity;
151 struct ceph_crypto_key old_key; 157 struct ceph_crypto_key old_key;
152 void *tp, *tpend; 158 void *tp, *tpend;
159 struct ceph_timespec new_validity;
160 struct ceph_crypto_key new_session_key;
161 struct ceph_buffer *new_ticket_blob;
162 unsigned long new_expires, new_renew_after;
163 u64 new_secret_id;
153 164
154 ceph_decode_need(&p, end, sizeof(u32) + 1, bad); 165 ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
155 166
@@ -182,16 +193,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
182 goto bad; 193 goto bad;
183 194
184 memcpy(&old_key, &th->session_key, sizeof(old_key)); 195 memcpy(&old_key, &th->session_key, sizeof(old_key));
185 ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); 196 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
186 if (ret) 197 if (ret)
187 goto out; 198 goto out;
188 199
189 ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); 200 ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
190 ceph_decode_timespec(&validity, &th->validity); 201 ceph_decode_timespec(&validity, &new_validity);
191 th->expires = get_seconds() + validity.tv_sec; 202 new_expires = get_seconds() + validity.tv_sec;
192 th->renew_after = th->expires - (validity.tv_sec / 4); 203 new_renew_after = new_expires - (validity.tv_sec / 4);
193 dout(" expires=%lu renew_after=%lu\n", th->expires, 204 dout(" expires=%lu renew_after=%lu\n", new_expires,
194 th->renew_after); 205 new_renew_after);
195 206
196 /* ticket blob for service */ 207 /* ticket blob for service */
197 ceph_decode_8_safe(&p, end, is_enc, bad); 208 ceph_decode_8_safe(&p, end, is_enc, bad);
@@ -216,10 +227,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
216 dout(" ticket blob is %d bytes\n", dlen); 227 dout(" ticket blob is %d bytes\n", dlen);
217 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); 228 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
218 struct_v = ceph_decode_8(&tp); 229 struct_v = ceph_decode_8(&tp);
219 th->secret_id = ceph_decode_64(&tp); 230 new_secret_id = ceph_decode_64(&tp);
220 ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); 231 ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
221 if (ret) 232 if (ret)
222 goto out; 233 goto out;
234
235 /* all is well, update our ticket */
236 ceph_crypto_key_destroy(&th->session_key);
237 if (th->ticket_blob)
238 ceph_buffer_put(th->ticket_blob);
239 th->session_key = new_session_key;
240 th->ticket_blob = new_ticket_blob;
241 th->validity = new_validity;
242 th->secret_id = new_secret_id;
243 th->expires = new_expires;
244 th->renew_after = new_renew_after;
223 dout(" got ticket service %d (%s) secret_id %lld len %d\n", 245 dout(" got ticket service %d (%s) secret_id %lld len %d\n",
224 type, ceph_entity_type_name(type), th->secret_id, 246 type, ceph_entity_type_name(type), th->secret_id,
225 (int)th->ticket_blob->vec.iov_len); 247 (int)th->ticket_blob->vec.iov_len);
@@ -242,7 +264,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
242 struct ceph_x_ticket_handler *th, 264 struct ceph_x_ticket_handler *th,
243 struct ceph_x_authorizer *au) 265 struct ceph_x_authorizer *au)
244{ 266{
245 int len; 267 int maxlen;
246 struct ceph_x_authorize_a *msg_a; 268 struct ceph_x_authorize_a *msg_a;
247 struct ceph_x_authorize_b msg_b; 269 struct ceph_x_authorize_b msg_b;
248 void *p, *end; 270 void *p, *end;
@@ -253,15 +275,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
253 dout("build_authorizer for %s %p\n", 275 dout("build_authorizer for %s %p\n",
254 ceph_entity_type_name(th->service), au); 276 ceph_entity_type_name(th->service), au);
255 277
256 len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + 278 maxlen = sizeof(*msg_a) + sizeof(msg_b) +
257 ticket_blob_len + 16; 279 ceph_x_encrypt_buflen(ticket_blob_len);
258 dout(" need len %d\n", len); 280 dout(" need len %d\n", maxlen);
259 if (au->buf && au->buf->alloc_len < len) { 281 if (au->buf && au->buf->alloc_len < maxlen) {
260 ceph_buffer_put(au->buf); 282 ceph_buffer_put(au->buf);
261 au->buf = NULL; 283 au->buf = NULL;
262 } 284 }
263 if (!au->buf) { 285 if (!au->buf) {
264 au->buf = ceph_buffer_new(len, GFP_NOFS); 286 au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
265 if (!au->buf) 287 if (!au->buf)
266 return -ENOMEM; 288 return -ENOMEM;
267 } 289 }
@@ -296,6 +318,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
296 au->buf->vec.iov_len = p - au->buf->vec.iov_base; 318 au->buf->vec.iov_len = p - au->buf->vec.iov_base;
297 dout(" built authorizer nonce %llx len %d\n", au->nonce, 319 dout(" built authorizer nonce %llx len %d\n", au->nonce,
298 (int)au->buf->vec.iov_len); 320 (int)au->buf->vec.iov_len);
321 BUG_ON(au->buf->vec.iov_len > maxlen);
299 return 0; 322 return 0;
300 323
301out_buf: 324out_buf:
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index db122bb357b..7d0a0d0adc1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1407,6 +1407,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
1407 */ 1407 */
1408void ceph_check_caps(struct ceph_inode_info *ci, int flags, 1408void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1409 struct ceph_mds_session *session) 1409 struct ceph_mds_session *session)
1410 __releases(session->s_mutex)
1410{ 1411{
1411 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); 1412 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
1412 struct ceph_mds_client *mdsc = &client->mdsc; 1413 struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1414,7 +1415,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1414 struct ceph_cap *cap; 1415 struct ceph_cap *cap;
1415 int file_wanted, used; 1416 int file_wanted, used;
1416 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ 1417 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
1417 int drop_session_lock = session ? 0 : 1;
1418 int issued, implemented, want, retain, revoking, flushing = 0; 1418 int issued, implemented, want, retain, revoking, flushing = 0;
1419 int mds = -1; /* keep track of how far we've gone through i_caps list 1419 int mds = -1; /* keep track of how far we've gone through i_caps list
1420 to avoid an infinite loop on retry */ 1420 to avoid an infinite loop on retry */
@@ -1639,7 +1639,7 @@ ack:
1639 if (queue_invalidate) 1639 if (queue_invalidate)
1640 ceph_queue_invalidate(inode); 1640 ceph_queue_invalidate(inode);
1641 1641
1642 if (session && drop_session_lock) 1642 if (session)
1643 mutex_unlock(&session->s_mutex); 1643 mutex_unlock(&session->s_mutex);
1644 if (took_snap_rwsem) 1644 if (took_snap_rwsem)
1645 up_read(&mdsc->snap_rwsem); 1645 up_read(&mdsc->snap_rwsem);
@@ -2195,18 +2195,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2195 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2195 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2196 * actually be a revocation if it specifies a smaller cap set.) 2196 * actually be a revocation if it specifies a smaller cap set.)
2197 * 2197 *
2198 * caller holds s_mutex. 2198 * caller holds s_mutex and i_lock, we drop both.
2199 *
2199 * return value: 2200 * return value:
2200 * 0 - ok 2201 * 0 - ok
2201 * 1 - check_caps on auth cap only (writeback) 2202 * 1 - check_caps on auth cap only (writeback)
2202 * 2 - check_caps (ack revoke) 2203 * 2 - check_caps (ack revoke)
2203 */ 2204 */
2204static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, 2205static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2205 struct ceph_mds_session *session, 2206 struct ceph_mds_session *session,
2206 struct ceph_cap *cap, 2207 struct ceph_cap *cap,
2207 struct ceph_buffer *xattr_buf) 2208 struct ceph_buffer *xattr_buf)
2208 __releases(inode->i_lock) 2209 __releases(inode->i_lock)
2209 2210 __releases(session->s_mutex)
2210{ 2211{
2211 struct ceph_inode_info *ci = ceph_inode(inode); 2212 struct ceph_inode_info *ci = ceph_inode(inode);
2212 int mds = session->s_mds; 2213 int mds = session->s_mds;
@@ -2216,7 +2217,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2216 u64 size = le64_to_cpu(grant->size); 2217 u64 size = le64_to_cpu(grant->size);
2217 u64 max_size = le64_to_cpu(grant->max_size); 2218 u64 max_size = le64_to_cpu(grant->max_size);
2218 struct timespec mtime, atime, ctime; 2219 struct timespec mtime, atime, ctime;
2219 int reply = 0; 2220 int check_caps = 0;
2220 int wake = 0; 2221 int wake = 0;
2221 int writeback = 0; 2222 int writeback = 0;
2222 int revoked_rdcache = 0; 2223 int revoked_rdcache = 0;
@@ -2329,11 +2330,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2329 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) 2330 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
2330 writeback = 1; /* will delay ack */ 2331 writeback = 1; /* will delay ack */
2331 else if (dirty & ~newcaps) 2332 else if (dirty & ~newcaps)
2332 reply = 1; /* initiate writeback in check_caps */ 2333 check_caps = 1; /* initiate writeback in check_caps */
2333 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || 2334 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
2334 revoked_rdcache) 2335 revoked_rdcache)
2335 reply = 2; /* send revoke ack in check_caps */ 2336 check_caps = 2; /* send revoke ack in check_caps */
2336 cap->issued = newcaps; 2337 cap->issued = newcaps;
2338 cap->implemented |= newcaps;
2337 } else if (cap->issued == newcaps) { 2339 } else if (cap->issued == newcaps) {
2338 dout("caps unchanged: %s -> %s\n", 2340 dout("caps unchanged: %s -> %s\n",
2339 ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); 2341 ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
@@ -2346,6 +2348,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2346 * pending revocation */ 2348 * pending revocation */
2347 wake = 1; 2349 wake = 1;
2348 } 2350 }
2351 BUG_ON(cap->issued & ~cap->implemented);
2349 2352
2350 spin_unlock(&inode->i_lock); 2353 spin_unlock(&inode->i_lock);
2351 if (writeback) 2354 if (writeback)
@@ -2359,7 +2362,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2359 ceph_queue_invalidate(inode); 2362 ceph_queue_invalidate(inode);
2360 if (wake) 2363 if (wake)
2361 wake_up(&ci->i_cap_wq); 2364 wake_up(&ci->i_cap_wq);
2362 return reply; 2365
2366 if (check_caps == 1)
2367 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2368 session);
2369 else if (check_caps == 2)
2370 ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
2371 else
2372 mutex_unlock(&session->s_mutex);
2363} 2373}
2364 2374
2365/* 2375/*
@@ -2548,9 +2558,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2548 ci->i_cap_exporting_issued = cap->issued; 2558 ci->i_cap_exporting_issued = cap->issued;
2549 } 2559 }
2550 __ceph_remove_cap(cap); 2560 __ceph_remove_cap(cap);
2551 } else {
2552 WARN_ON(!cap);
2553 } 2561 }
2562 /* else, we already released it */
2554 2563
2555 spin_unlock(&inode->i_lock); 2564 spin_unlock(&inode->i_lock);
2556} 2565}
@@ -2621,9 +2630,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2621 u64 cap_id; 2630 u64 cap_id;
2622 u64 size, max_size; 2631 u64 size, max_size;
2623 u64 tid; 2632 u64 tid;
2624 int check_caps = 0;
2625 void *snaptrace; 2633 void *snaptrace;
2626 int r;
2627 2634
2628 dout("handle_caps from mds%d\n", mds); 2635 dout("handle_caps from mds%d\n", mds);
2629 2636
@@ -2668,8 +2675,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2668 case CEPH_CAP_OP_IMPORT: 2675 case CEPH_CAP_OP_IMPORT:
2669 handle_cap_import(mdsc, inode, h, session, 2676 handle_cap_import(mdsc, inode, h, session,
2670 snaptrace, le32_to_cpu(h->snap_trace_len)); 2677 snaptrace, le32_to_cpu(h->snap_trace_len));
2671 check_caps = 1; /* we may have sent a RELEASE to the old auth */ 2678 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
2672 goto done; 2679 session);
2680 goto done_unlocked;
2673 } 2681 }
2674 2682
2675 /* the rest require a cap */ 2683 /* the rest require a cap */
@@ -2686,16 +2694,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2686 switch (op) { 2694 switch (op) {
2687 case CEPH_CAP_OP_REVOKE: 2695 case CEPH_CAP_OP_REVOKE:
2688 case CEPH_CAP_OP_GRANT: 2696 case CEPH_CAP_OP_GRANT:
2689 r = handle_cap_grant(inode, h, session, cap, msg->middle); 2697 handle_cap_grant(inode, h, session, cap, msg->middle);
2690 if (r == 1) 2698 goto done_unlocked;
2691 ceph_check_caps(ceph_inode(inode),
2692 CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2693 session);
2694 else if (r == 2)
2695 ceph_check_caps(ceph_inode(inode),
2696 CHECK_CAPS_NODELAY,
2697 session);
2698 break;
2699 2699
2700 case CEPH_CAP_OP_FLUSH_ACK: 2700 case CEPH_CAP_OP_FLUSH_ACK:
2701 handle_cap_flush_ack(inode, tid, h, session, cap); 2701 handle_cap_flush_ack(inode, tid, h, session, cap);
@@ -2713,9 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2713 2713
2714done: 2714done:
2715 mutex_unlock(&session->s_mutex); 2715 mutex_unlock(&session->s_mutex);
2716 2716done_unlocked:
2717 if (check_caps)
2718 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
2719 if (inode) 2717 if (inode)
2720 iput(inode); 2718 iput(inode);
2721 return; 2719 return;
@@ -2838,11 +2836,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2838 struct ceph_cap *cap; 2836 struct ceph_cap *cap;
2839 struct ceph_mds_request_release *rel = *p; 2837 struct ceph_mds_request_release *rel = *p;
2840 int ret = 0; 2838 int ret = 0;
2841 2839 int used = 0;
2842 dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
2843 mds, ceph_cap_string(drop), ceph_cap_string(unless));
2844 2840
2845 spin_lock(&inode->i_lock); 2841 spin_lock(&inode->i_lock);
2842 used = __ceph_caps_used(ci);
2843
2844 dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
2845 mds, ceph_cap_string(used), ceph_cap_string(drop),
2846 ceph_cap_string(unless));
2847
2848 /* only drop unused caps */
2849 drop &= ~used;
2850
2846 cap = __get_cap_for_mds(ci, mds); 2851 cap = __get_cap_for_mds(ci, mds);
2847 if (cap && __cap_is_valid(cap)) { 2852 if (cap && __cap_is_valid(cap)) {
2848 if (force || 2853 if (force ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 5107384ee02..8a9116e15b7 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -288,8 +288,10 @@ more:
288 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 288 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
289 289
290 /* discard old result, if any */ 290 /* discard old result, if any */
291 if (fi->last_readdir) 291 if (fi->last_readdir) {
292 ceph_mdsc_put_request(fi->last_readdir); 292 ceph_mdsc_put_request(fi->last_readdir);
293 fi->last_readdir = NULL;
294 }
293 295
294 /* requery frag tree, as the frag topology may have changed */ 296 /* requery frag tree, as the frag topology may have changed */
295 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); 297 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7abe1aed819..aca82d55cc5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode)
378 378
379 ceph_queue_caps_release(inode); 379 ceph_queue_caps_release(inode);
380 380
381 /*
382 * we may still have a snap_realm reference if there are stray
383 * caps in i_cap_exporting_issued or i_snap_caps.
384 */
385 if (ci->i_snap_realm) {
386 struct ceph_mds_client *mdsc =
387 &ceph_client(ci->vfs_inode.i_sb)->mdsc;
388 struct ceph_snap_realm *realm = ci->i_snap_realm;
389
390 dout(" dropping residual ref to snap realm %p\n", realm);
391 spin_lock(&realm->inodes_with_caps_lock);
392 list_del_init(&ci->i_snap_realm_item);
393 spin_unlock(&realm->inodes_with_caps_lock);
394 ceph_put_snap_realm(mdsc, realm);
395 }
396
381 kfree(ci->i_symlink); 397 kfree(ci->i_symlink);
382 while ((n = rb_first(&ci->i_fragtree)) != NULL) { 398 while ((n = rb_first(&ci->i_fragtree)) != NULL) {
383 frag = rb_entry(n, struct ceph_inode_frag, node); 399 frag = rb_entry(n, struct ceph_inode_frag, node);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a2600101ec2..5c7920be642 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -328,6 +328,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
328 struct ceph_mds_session *s; 328 struct ceph_mds_session *s;
329 329
330 s = kzalloc(sizeof(*s), GFP_NOFS); 330 s = kzalloc(sizeof(*s), GFP_NOFS);
331 if (!s)
332 return ERR_PTR(-ENOMEM);
331 s->s_mdsc = mdsc; 333 s->s_mdsc = mdsc;
332 s->s_mds = mds; 334 s->s_mds = mds;
333 s->s_state = CEPH_MDS_SESSION_NEW; 335 s->s_state = CEPH_MDS_SESSION_NEW;
@@ -529,7 +531,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
529{ 531{
530 dout("__unregister_request %p tid %lld\n", req, req->r_tid); 532 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
531 rb_erase(&req->r_node, &mdsc->request_tree); 533 rb_erase(&req->r_node, &mdsc->request_tree);
532 ceph_mdsc_put_request(req); 534 RB_CLEAR_NODE(&req->r_node);
533 535
534 if (req->r_unsafe_dir) { 536 if (req->r_unsafe_dir) {
535 struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); 537 struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
@@ -538,6 +540,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
538 list_del_init(&req->r_unsafe_dir_item); 540 list_del_init(&req->r_unsafe_dir_item);
539 spin_unlock(&ci->i_unsafe_lock); 541 spin_unlock(&ci->i_unsafe_lock);
540 } 542 }
543
544 ceph_mdsc_put_request(req);
541} 545}
542 546
543/* 547/*
@@ -862,6 +866,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
862 if (time_after_eq(jiffies, session->s_cap_ttl) && 866 if (time_after_eq(jiffies, session->s_cap_ttl) &&
863 time_after_eq(session->s_cap_ttl, session->s_renew_requested)) 867 time_after_eq(session->s_cap_ttl, session->s_renew_requested))
864 pr_info("mds%d caps stale\n", session->s_mds); 868 pr_info("mds%d caps stale\n", session->s_mds);
869 session->s_renew_requested = jiffies;
865 870
866 /* do not try to renew caps until a recovering mds has reconnected 871 /* do not try to renew caps until a recovering mds has reconnected
867 * with its clients. */ 872 * with its clients. */
@@ -874,7 +879,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
874 879
875 dout("send_renew_caps to mds%d (%s)\n", session->s_mds, 880 dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
876 ceph_mds_state_name(state)); 881 ceph_mds_state_name(state));
877 session->s_renew_requested = jiffies;
878 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, 882 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
879 ++session->s_renew_seq); 883 ++session->s_renew_seq);
880 if (IS_ERR(msg)) 884 if (IS_ERR(msg))
@@ -1566,8 +1570,13 @@ static int __do_request(struct ceph_mds_client *mdsc,
1566 1570
1567 /* get, open session */ 1571 /* get, open session */
1568 session = __ceph_lookup_mds_session(mdsc, mds); 1572 session = __ceph_lookup_mds_session(mdsc, mds);
1569 if (!session) 1573 if (!session) {
1570 session = register_session(mdsc, mds); 1574 session = register_session(mdsc, mds);
1575 if (IS_ERR(session)) {
1576 err = PTR_ERR(session);
1577 goto finish;
1578 }
1579 }
1571 dout("do_request mds%d session %p state %s\n", mds, session, 1580 dout("do_request mds%d session %p state %s\n", mds, session,
1572 session_state_name(session->s_state)); 1581 session_state_name(session->s_state));
1573 if (session->s_state != CEPH_MDS_SESSION_OPEN && 1582 if (session->s_state != CEPH_MDS_SESSION_OPEN &&
@@ -1770,7 +1779,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1770 dout("handle_reply %p\n", req); 1779 dout("handle_reply %p\n", req);
1771 1780
1772 /* correct session? */ 1781 /* correct session? */
1773 if (!req->r_session && req->r_session != session) { 1782 if (req->r_session != session) {
1774 pr_err("mdsc_handle_reply got %llu on session mds%d" 1783 pr_err("mdsc_handle_reply got %llu on session mds%d"
1775 " not mds%d\n", tid, session->s_mds, 1784 " not mds%d\n", tid, session->s_mds,
1776 req->r_session ? req->r_session->s_mds : -1); 1785 req->r_session ? req->r_session->s_mds : -1);
@@ -2682,29 +2691,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
2682 */ 2691 */
2683static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) 2692static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
2684{ 2693{
2685 struct ceph_mds_request *req = NULL; 2694 struct ceph_mds_request *req = NULL, *nextreq;
2686 struct rb_node *n; 2695 struct rb_node *n;
2687 2696
2688 mutex_lock(&mdsc->mutex); 2697 mutex_lock(&mdsc->mutex);
2689 dout("wait_unsafe_requests want %lld\n", want_tid); 2698 dout("wait_unsafe_requests want %lld\n", want_tid);
2699restart:
2690 req = __get_oldest_req(mdsc); 2700 req = __get_oldest_req(mdsc);
2691 while (req && req->r_tid <= want_tid) { 2701 while (req && req->r_tid <= want_tid) {
2702 /* find next request */
2703 n = rb_next(&req->r_node);
2704 if (n)
2705 nextreq = rb_entry(n, struct ceph_mds_request, r_node);
2706 else
2707 nextreq = NULL;
2692 if ((req->r_op & CEPH_MDS_OP_WRITE)) { 2708 if ((req->r_op & CEPH_MDS_OP_WRITE)) {
2693 /* write op */ 2709 /* write op */
2694 ceph_mdsc_get_request(req); 2710 ceph_mdsc_get_request(req);
2711 if (nextreq)
2712 ceph_mdsc_get_request(nextreq);
2695 mutex_unlock(&mdsc->mutex); 2713 mutex_unlock(&mdsc->mutex);
2696 dout("wait_unsafe_requests wait on %llu (want %llu)\n", 2714 dout("wait_unsafe_requests wait on %llu (want %llu)\n",
2697 req->r_tid, want_tid); 2715 req->r_tid, want_tid);
2698 wait_for_completion(&req->r_safe_completion); 2716 wait_for_completion(&req->r_safe_completion);
2699 mutex_lock(&mdsc->mutex); 2717 mutex_lock(&mdsc->mutex);
2700 n = rb_next(&req->r_node);
2701 ceph_mdsc_put_request(req); 2718 ceph_mdsc_put_request(req);
2702 } else { 2719 if (!nextreq)
2703 n = rb_next(&req->r_node); 2720 break; /* next dne before, so we're done! */
2721 if (RB_EMPTY_NODE(&nextreq->r_node)) {
2722 /* next request was removed from tree */
2723 ceph_mdsc_put_request(nextreq);
2724 goto restart;
2725 }
2726 ceph_mdsc_put_request(nextreq); /* won't go away */
2704 } 2727 }
2705 if (!n) 2728 req = nextreq;
2706 break;
2707 req = rb_entry(n, struct ceph_mds_request, r_node);
2708 } 2729 }
2709 mutex_unlock(&mdsc->mutex); 2730 mutex_unlock(&mdsc->mutex);
2710 dout("wait_unsafe_requests done\n"); 2731 dout("wait_unsafe_requests done\n");
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 781656a49bf..a32f0f896d9 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
366} 366}
367 367
368/* 368/*
369 * return true if this connection ever successfully opened
370 */
371bool ceph_con_opened(struct ceph_connection *con)
372{
373 return con->connect_seq > 0;
374}
375
376/*
369 * generic get/put 377 * generic get/put
370 */ 378 */
371struct ceph_connection *ceph_con_get(struct ceph_connection *con) 379struct ceph_connection *ceph_con_get(struct ceph_connection *con)
@@ -830,13 +838,6 @@ static void prepare_read_connect(struct ceph_connection *con)
830 con->in_base_pos = 0; 838 con->in_base_pos = 0;
831} 839}
832 840
833static void prepare_read_connect_retry(struct ceph_connection *con)
834{
835 dout("prepare_read_connect_retry %p\n", con);
836 con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr)
837 + sizeof(con->peer_addr_for_me);
838}
839
840static void prepare_read_ack(struct ceph_connection *con) 841static void prepare_read_ack(struct ceph_connection *con)
841{ 842{
842 dout("prepare_read_ack %p\n", con); 843 dout("prepare_read_ack %p\n", con);
@@ -1146,7 +1147,7 @@ static int process_connect(struct ceph_connection *con)
1146 } 1147 }
1147 con->auth_retry = 1; 1148 con->auth_retry = 1;
1148 prepare_write_connect(con->msgr, con, 0); 1149 prepare_write_connect(con->msgr, con, 0);
1149 prepare_read_connect_retry(con); 1150 prepare_read_connect(con);
1150 break; 1151 break;
1151 1152
1152 case CEPH_MSGR_TAG_RESETSESSION: 1153 case CEPH_MSGR_TAG_RESETSESSION:
@@ -1843,8 +1844,6 @@ static void ceph_fault(struct ceph_connection *con)
1843 goto out; 1844 goto out;
1844 } 1845 }
1845 1846
1846 clear_bit(BUSY, &con->state); /* to avoid an improbable race */
1847
1848 mutex_lock(&con->mutex); 1847 mutex_lock(&con->mutex);
1849 if (test_bit(CLOSED, &con->state)) 1848 if (test_bit(CLOSED, &con->state))
1850 goto out_unlock; 1849 goto out_unlock;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 4caaa591111..a343dae73cd 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr,
223 struct ceph_connection *con); 223 struct ceph_connection *con);
224extern void ceph_con_open(struct ceph_connection *con, 224extern void ceph_con_open(struct ceph_connection *con,
225 struct ceph_entity_addr *addr); 225 struct ceph_entity_addr *addr);
226extern bool ceph_con_opened(struct ceph_connection *con);
226extern void ceph_con_close(struct ceph_connection *con); 227extern void ceph_con_close(struct ceph_connection *con);
227extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); 228extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
228extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); 229extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index dbe63db9762..c7b4dedaace 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
413 */ 413 */
414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) 414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
415{ 415{
416 struct ceph_osd_request *req;
416 int ret = 0; 417 int ret = 0;
417 418
418 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 419 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
419 if (list_empty(&osd->o_requests)) { 420 if (list_empty(&osd->o_requests)) {
420 __remove_osd(osdc, osd); 421 __remove_osd(osdc, osd);
422 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
423 &osd->o_con.peer_addr,
424 sizeof(osd->o_con.peer_addr)) == 0 &&
425 !ceph_con_opened(&osd->o_con)) {
426 dout(" osd addr hasn't changed and connection never opened,"
427 " letting msgr retry");
428 /* touch each r_stamp for handle_timeout()'s benfit */
429 list_for_each_entry(req, &osd->o_requests, r_osd_item)
430 req->r_stamp = jiffies;
431 ret = -EAGAIN;
421 } else { 432 } else {
422 ceph_con_close(&osd->o_con); 433 ceph_con_close(&osd->o_con);
423 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); 434 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
@@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc,
633 reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ 644 reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */
634 reqhead->reassert_version = req->r_reassert_version; 645 reqhead->reassert_version = req->r_reassert_version;
635 646
636 req->r_sent_stamp = jiffies; 647 req->r_stamp = jiffies;
637 list_move_tail(&osdc->req_lru, &req->r_req_lru_item); 648 list_move_tail(&osdc->req_lru, &req->r_req_lru_item);
638 649
639 ceph_msg_get(req->r_request); /* send consumes a ref */ 650 ceph_msg_get(req->r_request); /* send consumes a ref */
@@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work)
660 unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; 671 unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
661 unsigned long keepalive = 672 unsigned long keepalive =
662 osdc->client->mount_args->osd_keepalive_timeout * HZ; 673 osdc->client->mount_args->osd_keepalive_timeout * HZ;
663 unsigned long last_sent = 0; 674 unsigned long last_stamp = 0;
664 struct rb_node *p; 675 struct rb_node *p;
665 struct list_head slow_osds; 676 struct list_head slow_osds;
666 677
@@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work)
697 req = list_entry(osdc->req_lru.next, struct ceph_osd_request, 708 req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
698 r_req_lru_item); 709 r_req_lru_item);
699 710
700 if (time_before(jiffies, req->r_sent_stamp + timeout)) 711 if (time_before(jiffies, req->r_stamp + timeout))
701 break; 712 break;
702 713
703 BUG_ON(req == last_req && req->r_sent_stamp == last_sent); 714 BUG_ON(req == last_req && req->r_stamp == last_stamp);
704 last_req = req; 715 last_req = req;
705 last_sent = req->r_sent_stamp; 716 last_stamp = req->r_stamp;
706 717
707 osd = req->r_osd; 718 osd = req->r_osd;
708 BUG_ON(!osd); 719 BUG_ON(!osd);
@@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work)
718 */ 729 */
719 INIT_LIST_HEAD(&slow_osds); 730 INIT_LIST_HEAD(&slow_osds);
720 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { 731 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
721 if (time_before(jiffies, req->r_sent_stamp + keepalive)) 732 if (time_before(jiffies, req->r_stamp + keepalive))
722 break; 733 break;
723 734
724 osd = req->r_osd; 735 osd = req->r_osd;
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc,
862 873
863 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); 874 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
864 if (kickosd) { 875 if (kickosd) {
865 __reset_osd(osdc, kickosd); 876 err = __reset_osd(osdc, kickosd);
877 if (err == -EAGAIN)
878 return 1;
866 } else { 879 } else {
867 for (p = rb_first(&osdc->osds); p; p = n) { 880 for (p = rb_first(&osdc->osds); p; p = n) {
868 struct ceph_osd *osd = 881 struct ceph_osd *osd =
@@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc,
913 926
914kick: 927kick:
915 dout("kicking %p tid %llu osd%d\n", req, req->r_tid, 928 dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
916 req->r_osd->o_osd); 929 req->r_osd ? req->r_osd->o_osd : -1);
917 req->r_flags |= CEPH_OSD_FLAG_RETRY; 930 req->r_flags |= CEPH_OSD_FLAG_RETRY;
918 err = __send_request(osdc, req); 931 err = __send_request(osdc, req);
919 if (err) { 932 if (err) {
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index 1b1a3ca43af..b0759911e7c 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -70,7 +70,7 @@ struct ceph_osd_request {
70 70
71 char r_oid[40]; /* object name */ 71 char r_oid[40]; /* object name */
72 int r_oid_len; 72 int r_oid_len;
73 unsigned long r_sent_stamp; 73 unsigned long r_stamp; /* send OR check time */
74 bool r_resend; /* msg send failed, needs retry */ 74 bool r_resend; /* msg send failed, needs retry */
75 75
76 struct ceph_file_layout r_file_layout; 76 struct ceph_file_layout r_file_layout;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index b83f2692b83..d82fe87c2a6 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -480,6 +480,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
480 return NULL; 480 return NULL;
481} 481}
482 482
483void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
484{
485 ceph_decode_copy(p, &pi->v, sizeof(pi->v));
486 calc_pg_masks(pi);
487 *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
488 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
489}
490
483/* 491/*
484 * decode a full map. 492 * decode a full map.
485 */ 493 */
@@ -526,12 +534,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
526 ev, CEPH_PG_POOL_VERSION); 534 ev, CEPH_PG_POOL_VERSION);
527 goto bad; 535 goto bad;
528 } 536 }
529 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 537 __decode_pool(p, pi);
530 __insert_pg_pool(&map->pg_pools, pi); 538 __insert_pg_pool(&map->pg_pools, pi);
531 calc_pg_masks(pi);
532 *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
533 *p += le32_to_cpu(pi->v.num_removed_snap_intervals)
534 * sizeof(u64) * 2;
535 } 539 }
536 ceph_decode_32_safe(p, end, map->pool_max, bad); 540 ceph_decode_32_safe(p, end, map->pool_max, bad);
537 541
@@ -714,8 +718,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
714 pi->id = pool; 718 pi->id = pool;
715 __insert_pg_pool(&map->pg_pools, pi); 719 __insert_pg_pool(&map->pg_pools, pi);
716 } 720 }
717 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 721 __decode_pool(p, pi);
718 calc_pg_masks(pi);
719 } 722 }
720 723
721 /* old_pool */ 724 /* old_pool */
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index bf2a5f3846a..df04e210a05 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -314,9 +314,9 @@ static int build_snap_context(struct ceph_snap_realm *realm)
314 because we rebuild_snap_realms() works _downward_ in 314 because we rebuild_snap_realms() works _downward_ in
315 hierarchy after each update.) */ 315 hierarchy after each update.) */
316 if (realm->cached_context && 316 if (realm->cached_context &&
317 realm->cached_context->seq <= realm->seq && 317 realm->cached_context->seq == realm->seq &&
318 (!parent || 318 (!parent ||
319 realm->cached_context->seq <= parent->cached_context->seq)) { 319 realm->cached_context->seq >= parent->cached_context->seq)) {
320 dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" 320 dout("build_snap_context %llx %p: %p seq %lld (%d snaps)"
321 " (unchanged)\n", 321 " (unchanged)\n",
322 realm->ino, realm, realm->cached_context, 322 realm->ino, realm, realm->cached_context,
@@ -818,7 +818,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
818 * queued (again) by ceph_update_snap_trace() 818 * queued (again) by ceph_update_snap_trace()
819 * below. Queue it _now_, under the old context. 819 * below. Queue it _now_, under the old context.
820 */ 820 */
821 spin_lock(&realm->inodes_with_caps_lock);
821 list_del_init(&ci->i_snap_realm_item); 822 list_del_init(&ci->i_snap_realm_item);
823 spin_unlock(&realm->inodes_with_caps_lock);
822 spin_unlock(&inode->i_lock); 824 spin_unlock(&inode->i_lock);
823 825
824 ceph_queue_cap_snap(ci, 826 ceph_queue_cap_snap(ci,