aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c11
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.c1
-rw-r--r--fs/ceph/auth_x.c54
-rw-r--r--fs/ceph/buffer.c3
-rw-r--r--fs/ceph/caps.c74
-rw-r--r--fs/ceph/crypto.c1
-rw-r--r--fs/ceph/debugfs.c1
-rw-r--r--fs/ceph/dir.c5
-rw-r--r--fs/ceph/export.c1
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/mds_client.c44
-rw-r--r--fs/ceph/messenger.c20
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/mon_client.c1
-rw-r--r--fs/ceph/osd_client.c29
-rw-r--r--fs/ceph/osd_client.h2
-rw-r--r--fs/ceph/osdmap.c21
-rw-r--r--fs/ceph/pagelist.c1
-rw-r--r--fs/ceph/snap.c7
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/ceph/xattr.c1
24 files changed, 206 insertions, 92 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 23bb0ceabe31..aa3cd7cc3e40 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -5,6 +5,7 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/writeback.h> /* generic_writepages */ 7#include <linux/writeback.h> /* generic_writepages */
8#include <linux/slab.h>
8#include <linux/pagevec.h> 9#include <linux/pagevec.h>
9#include <linux/task_io_accounting_ops.h> 10#include <linux/task_io_accounting_ops.h>
10 11
@@ -919,6 +920,10 @@ static int context_is_writeable_or_written(struct inode *inode,
919/* 920/*
920 * We are only allowed to write into/dirty the page if the page is 921 * We are only allowed to write into/dirty the page if the page is
921 * clean, or already dirty within the same snap context. 922 * clean, or already dirty within the same snap context.
923 *
924 * called with page locked.
925 * return success with page locked,
926 * or any failure (incl -EAGAIN) with page unlocked.
922 */ 927 */
923static int ceph_update_writeable_page(struct file *file, 928static int ceph_update_writeable_page(struct file *file,
924 loff_t pos, unsigned len, 929 loff_t pos, unsigned len,
@@ -961,9 +966,11 @@ retry_locked:
961 snapc = ceph_get_snap_context((void *)page->private); 966 snapc = ceph_get_snap_context((void *)page->private);
962 unlock_page(page); 967 unlock_page(page);
963 ceph_queue_writeback(inode); 968 ceph_queue_writeback(inode);
964 wait_event_interruptible(ci->i_cap_wq, 969 r = wait_event_interruptible(ci->i_cap_wq,
965 context_is_writeable_or_written(inode, snapc)); 970 context_is_writeable_or_written(inode, snapc));
966 ceph_put_snap_context(snapc); 971 ceph_put_snap_context(snapc);
972 if (r == -ERESTARTSYS)
973 return r;
967 return -EAGAIN; 974 return -EAGAIN;
968 } 975 }
969 976
@@ -1035,7 +1042,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1035 int r; 1042 int r;
1036 1043
1037 do { 1044 do {
1038 /* get a page*/ 1045 /* get a page */
1039 page = grab_cache_page_write_begin(mapping, index, 0); 1046 page = grab_cache_page_write_begin(mapping, index, 0);
1040 if (!page) 1047 if (!page)
1041 return -ENOMEM; 1048 return -ENOMEM;
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index abb204fea6c7..f6394b94b866 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -1,6 +1,7 @@
1#include "ceph_debug.h" 1#include "ceph_debug.h"
2 2
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/slab.h>
4#include <linux/err.h> 5#include <linux/err.h>
5 6
6#include "types.h" 7#include "types.h"
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index b4ef6f0a6c85..8cd9e3af07f7 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -4,6 +4,7 @@
4#include <linux/err.h> 4#include <linux/err.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/random.h> 6#include <linux/random.h>
7#include <linux/slab.h>
7 8
8#include "auth_none.h" 9#include "auth_none.h"
9#include "auth.h" 10#include "auth.h"
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index f0318427b6da..d9001a4dc8cc 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -4,6 +4,7 @@
4#include <linux/err.h> 4#include <linux/err.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/random.h> 6#include <linux/random.h>
7#include <linux/slab.h>
7 8
8#include "auth_x.h" 9#include "auth_x.h"
9#include "auth_x_protocol.h" 10#include "auth_x_protocol.h"
@@ -28,6 +29,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
28 return (ac->want_keys & xi->have_keys) == ac->want_keys; 29 return (ac->want_keys & xi->have_keys) == ac->want_keys;
29} 30}
30 31
32static int ceph_x_encrypt_buflen(int ilen)
33{
34 return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
35 sizeof(u32);
36}
37
31static int ceph_x_encrypt(struct ceph_crypto_key *secret, 38static int ceph_x_encrypt(struct ceph_crypto_key *secret,
32 void *ibuf, int ilen, void *obuf, size_t olen) 39 void *ibuf, int ilen, void *obuf, size_t olen)
33{ 40{
@@ -150,6 +157,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
150 struct timespec validity; 157 struct timespec validity;
151 struct ceph_crypto_key old_key; 158 struct ceph_crypto_key old_key;
152 void *tp, *tpend; 159 void *tp, *tpend;
160 struct ceph_timespec new_validity;
161 struct ceph_crypto_key new_session_key;
162 struct ceph_buffer *new_ticket_blob;
163 unsigned long new_expires, new_renew_after;
164 u64 new_secret_id;
153 165
154 ceph_decode_need(&p, end, sizeof(u32) + 1, bad); 166 ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
155 167
@@ -182,16 +194,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
182 goto bad; 194 goto bad;
183 195
184 memcpy(&old_key, &th->session_key, sizeof(old_key)); 196 memcpy(&old_key, &th->session_key, sizeof(old_key));
185 ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); 197 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
186 if (ret) 198 if (ret)
187 goto out; 199 goto out;
188 200
189 ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); 201 ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
190 ceph_decode_timespec(&validity, &th->validity); 202 ceph_decode_timespec(&validity, &new_validity);
191 th->expires = get_seconds() + validity.tv_sec; 203 new_expires = get_seconds() + validity.tv_sec;
192 th->renew_after = th->expires - (validity.tv_sec / 4); 204 new_renew_after = new_expires - (validity.tv_sec / 4);
193 dout(" expires=%lu renew_after=%lu\n", th->expires, 205 dout(" expires=%lu renew_after=%lu\n", new_expires,
194 th->renew_after); 206 new_renew_after);
195 207
196 /* ticket blob for service */ 208 /* ticket blob for service */
197 ceph_decode_8_safe(&p, end, is_enc, bad); 209 ceph_decode_8_safe(&p, end, is_enc, bad);
@@ -216,10 +228,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
216 dout(" ticket blob is %d bytes\n", dlen); 228 dout(" ticket blob is %d bytes\n", dlen);
217 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); 229 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
218 struct_v = ceph_decode_8(&tp); 230 struct_v = ceph_decode_8(&tp);
219 th->secret_id = ceph_decode_64(&tp); 231 new_secret_id = ceph_decode_64(&tp);
220 ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); 232 ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
221 if (ret) 233 if (ret)
222 goto out; 234 goto out;
235
236 /* all is well, update our ticket */
237 ceph_crypto_key_destroy(&th->session_key);
238 if (th->ticket_blob)
239 ceph_buffer_put(th->ticket_blob);
240 th->session_key = new_session_key;
241 th->ticket_blob = new_ticket_blob;
242 th->validity = new_validity;
243 th->secret_id = new_secret_id;
244 th->expires = new_expires;
245 th->renew_after = new_renew_after;
223 dout(" got ticket service %d (%s) secret_id %lld len %d\n", 246 dout(" got ticket service %d (%s) secret_id %lld len %d\n",
224 type, ceph_entity_type_name(type), th->secret_id, 247 type, ceph_entity_type_name(type), th->secret_id,
225 (int)th->ticket_blob->vec.iov_len); 248 (int)th->ticket_blob->vec.iov_len);
@@ -242,7 +265,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
242 struct ceph_x_ticket_handler *th, 265 struct ceph_x_ticket_handler *th,
243 struct ceph_x_authorizer *au) 266 struct ceph_x_authorizer *au)
244{ 267{
245 int len; 268 int maxlen;
246 struct ceph_x_authorize_a *msg_a; 269 struct ceph_x_authorize_a *msg_a;
247 struct ceph_x_authorize_b msg_b; 270 struct ceph_x_authorize_b msg_b;
248 void *p, *end; 271 void *p, *end;
@@ -253,15 +276,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
253 dout("build_authorizer for %s %p\n", 276 dout("build_authorizer for %s %p\n",
254 ceph_entity_type_name(th->service), au); 277 ceph_entity_type_name(th->service), au);
255 278
256 len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + 279 maxlen = sizeof(*msg_a) + sizeof(msg_b) +
257 ticket_blob_len + 16; 280 ceph_x_encrypt_buflen(ticket_blob_len);
258 dout(" need len %d\n", len); 281 dout(" need len %d\n", maxlen);
259 if (au->buf && au->buf->alloc_len < len) { 282 if (au->buf && au->buf->alloc_len < maxlen) {
260 ceph_buffer_put(au->buf); 283 ceph_buffer_put(au->buf);
261 au->buf = NULL; 284 au->buf = NULL;
262 } 285 }
263 if (!au->buf) { 286 if (!au->buf) {
264 au->buf = ceph_buffer_new(len, GFP_NOFS); 287 au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
265 if (!au->buf) 288 if (!au->buf)
266 return -ENOMEM; 289 return -ENOMEM;
267 } 290 }
@@ -296,6 +319,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
296 au->buf->vec.iov_len = p - au->buf->vec.iov_base; 319 au->buf->vec.iov_len = p - au->buf->vec.iov_base;
297 dout(" built authorizer nonce %llx len %d\n", au->nonce, 320 dout(" built authorizer nonce %llx len %d\n", au->nonce,
298 (int)au->buf->vec.iov_len); 321 (int)au->buf->vec.iov_len);
322 BUG_ON(au->buf->vec.iov_len > maxlen);
299 return 0; 323 return 0;
300 324
301out_buf: 325out_buf:
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c
index b98086c7aeba..c67535d70aa6 100644
--- a/fs/ceph/buffer.c
+++ b/fs/ceph/buffer.c
@@ -1,5 +1,8 @@
1 1
2#include "ceph_debug.h" 2#include "ceph_debug.h"
3
4#include <linux/slab.h>
5
3#include "buffer.h" 6#include "buffer.h"
4#include "decode.h" 7#include "decode.h"
5 8
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index db122bb357b8..3710e077a857 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3,6 +3,7 @@
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/kernel.h> 4#include <linux/kernel.h>
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/slab.h>
6#include <linux/vmalloc.h> 7#include <linux/vmalloc.h>
7#include <linux/wait.h> 8#include <linux/wait.h>
8#include <linux/writeback.h> 9#include <linux/writeback.h>
@@ -1407,6 +1408,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
1407 */ 1408 */
1408void ceph_check_caps(struct ceph_inode_info *ci, int flags, 1409void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1409 struct ceph_mds_session *session) 1410 struct ceph_mds_session *session)
1411 __releases(session->s_mutex)
1410{ 1412{
1411 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); 1413 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
1412 struct ceph_mds_client *mdsc = &client->mdsc; 1414 struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1414,7 +1416,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1414 struct ceph_cap *cap; 1416 struct ceph_cap *cap;
1415 int file_wanted, used; 1417 int file_wanted, used;
1416 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ 1418 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
1417 int drop_session_lock = session ? 0 : 1;
1418 int issued, implemented, want, retain, revoking, flushing = 0; 1419 int issued, implemented, want, retain, revoking, flushing = 0;
1419 int mds = -1; /* keep track of how far we've gone through i_caps list 1420 int mds = -1; /* keep track of how far we've gone through i_caps list
1420 to avoid an infinite loop on retry */ 1421 to avoid an infinite loop on retry */
@@ -1639,7 +1640,7 @@ ack:
1639 if (queue_invalidate) 1640 if (queue_invalidate)
1640 ceph_queue_invalidate(inode); 1641 ceph_queue_invalidate(inode);
1641 1642
1642 if (session && drop_session_lock) 1643 if (session)
1643 mutex_unlock(&session->s_mutex); 1644 mutex_unlock(&session->s_mutex);
1644 if (took_snap_rwsem) 1645 if (took_snap_rwsem)
1645 up_read(&mdsc->snap_rwsem); 1646 up_read(&mdsc->snap_rwsem);
@@ -2195,18 +2196,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2195 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2196 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2196 * actually be a revocation if it specifies a smaller cap set.) 2197 * actually be a revocation if it specifies a smaller cap set.)
2197 * 2198 *
2198 * caller holds s_mutex. 2199 * caller holds s_mutex and i_lock, we drop both.
2200 *
2199 * return value: 2201 * return value:
2200 * 0 - ok 2202 * 0 - ok
2201 * 1 - check_caps on auth cap only (writeback) 2203 * 1 - check_caps on auth cap only (writeback)
2202 * 2 - check_caps (ack revoke) 2204 * 2 - check_caps (ack revoke)
2203 */ 2205 */
2204static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, 2206static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2205 struct ceph_mds_session *session, 2207 struct ceph_mds_session *session,
2206 struct ceph_cap *cap, 2208 struct ceph_cap *cap,
2207 struct ceph_buffer *xattr_buf) 2209 struct ceph_buffer *xattr_buf)
2208 __releases(inode->i_lock) 2210 __releases(inode->i_lock)
2209 2211 __releases(session->s_mutex)
2210{ 2212{
2211 struct ceph_inode_info *ci = ceph_inode(inode); 2213 struct ceph_inode_info *ci = ceph_inode(inode);
2212 int mds = session->s_mds; 2214 int mds = session->s_mds;
@@ -2216,7 +2218,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2216 u64 size = le64_to_cpu(grant->size); 2218 u64 size = le64_to_cpu(grant->size);
2217 u64 max_size = le64_to_cpu(grant->max_size); 2219 u64 max_size = le64_to_cpu(grant->max_size);
2218 struct timespec mtime, atime, ctime; 2220 struct timespec mtime, atime, ctime;
2219 int reply = 0; 2221 int check_caps = 0;
2220 int wake = 0; 2222 int wake = 0;
2221 int writeback = 0; 2223 int writeback = 0;
2222 int revoked_rdcache = 0; 2224 int revoked_rdcache = 0;
@@ -2329,11 +2331,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2329 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) 2331 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
2330 writeback = 1; /* will delay ack */ 2332 writeback = 1; /* will delay ack */
2331 else if (dirty & ~newcaps) 2333 else if (dirty & ~newcaps)
2332 reply = 1; /* initiate writeback in check_caps */ 2334 check_caps = 1; /* initiate writeback in check_caps */
2333 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || 2335 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
2334 revoked_rdcache) 2336 revoked_rdcache)
2335 reply = 2; /* send revoke ack in check_caps */ 2337 check_caps = 2; /* send revoke ack in check_caps */
2336 cap->issued = newcaps; 2338 cap->issued = newcaps;
2339 cap->implemented |= newcaps;
2337 } else if (cap->issued == newcaps) { 2340 } else if (cap->issued == newcaps) {
2338 dout("caps unchanged: %s -> %s\n", 2341 dout("caps unchanged: %s -> %s\n",
2339 ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); 2342 ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
@@ -2346,6 +2349,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2346 * pending revocation */ 2349 * pending revocation */
2347 wake = 1; 2350 wake = 1;
2348 } 2351 }
2352 BUG_ON(cap->issued & ~cap->implemented);
2349 2353
2350 spin_unlock(&inode->i_lock); 2354 spin_unlock(&inode->i_lock);
2351 if (writeback) 2355 if (writeback)
@@ -2359,7 +2363,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2359 ceph_queue_invalidate(inode); 2363 ceph_queue_invalidate(inode);
2360 if (wake) 2364 if (wake)
2361 wake_up(&ci->i_cap_wq); 2365 wake_up(&ci->i_cap_wq);
2362 return reply; 2366
2367 if (check_caps == 1)
2368 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2369 session);
2370 else if (check_caps == 2)
2371 ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
2372 else
2373 mutex_unlock(&session->s_mutex);
2363} 2374}
2364 2375
2365/* 2376/*
@@ -2548,9 +2559,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2548 ci->i_cap_exporting_issued = cap->issued; 2559 ci->i_cap_exporting_issued = cap->issued;
2549 } 2560 }
2550 __ceph_remove_cap(cap); 2561 __ceph_remove_cap(cap);
2551 } else {
2552 WARN_ON(!cap);
2553 } 2562 }
2563 /* else, we already released it */
2554 2564
2555 spin_unlock(&inode->i_lock); 2565 spin_unlock(&inode->i_lock);
2556} 2566}
@@ -2621,9 +2631,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2621 u64 cap_id; 2631 u64 cap_id;
2622 u64 size, max_size; 2632 u64 size, max_size;
2623 u64 tid; 2633 u64 tid;
2624 int check_caps = 0;
2625 void *snaptrace; 2634 void *snaptrace;
2626 int r;
2627 2635
2628 dout("handle_caps from mds%d\n", mds); 2636 dout("handle_caps from mds%d\n", mds);
2629 2637
@@ -2668,8 +2676,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2668 case CEPH_CAP_OP_IMPORT: 2676 case CEPH_CAP_OP_IMPORT:
2669 handle_cap_import(mdsc, inode, h, session, 2677 handle_cap_import(mdsc, inode, h, session,
2670 snaptrace, le32_to_cpu(h->snap_trace_len)); 2678 snaptrace, le32_to_cpu(h->snap_trace_len));
2671 check_caps = 1; /* we may have sent a RELEASE to the old auth */ 2679 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
2672 goto done; 2680 session);
2681 goto done_unlocked;
2673 } 2682 }
2674 2683
2675 /* the rest require a cap */ 2684 /* the rest require a cap */
@@ -2686,16 +2695,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2686 switch (op) { 2695 switch (op) {
2687 case CEPH_CAP_OP_REVOKE: 2696 case CEPH_CAP_OP_REVOKE:
2688 case CEPH_CAP_OP_GRANT: 2697 case CEPH_CAP_OP_GRANT:
2689 r = handle_cap_grant(inode, h, session, cap, msg->middle); 2698 handle_cap_grant(inode, h, session, cap, msg->middle);
2690 if (r == 1) 2699 goto done_unlocked;
2691 ceph_check_caps(ceph_inode(inode),
2692 CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2693 session);
2694 else if (r == 2)
2695 ceph_check_caps(ceph_inode(inode),
2696 CHECK_CAPS_NODELAY,
2697 session);
2698 break;
2699 2700
2700 case CEPH_CAP_OP_FLUSH_ACK: 2701 case CEPH_CAP_OP_FLUSH_ACK:
2701 handle_cap_flush_ack(inode, tid, h, session, cap); 2702 handle_cap_flush_ack(inode, tid, h, session, cap);
@@ -2713,9 +2714,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2713 2714
2714done: 2715done:
2715 mutex_unlock(&session->s_mutex); 2716 mutex_unlock(&session->s_mutex);
2716 2717done_unlocked:
2717 if (check_caps)
2718 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
2719 if (inode) 2718 if (inode)
2720 iput(inode); 2719 iput(inode);
2721 return; 2720 return;
@@ -2838,11 +2837,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2838 struct ceph_cap *cap; 2837 struct ceph_cap *cap;
2839 struct ceph_mds_request_release *rel = *p; 2838 struct ceph_mds_request_release *rel = *p;
2840 int ret = 0; 2839 int ret = 0;
2841 2840 int used = 0;
2842 dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
2843 mds, ceph_cap_string(drop), ceph_cap_string(unless));
2844 2841
2845 spin_lock(&inode->i_lock); 2842 spin_lock(&inode->i_lock);
2843 used = __ceph_caps_used(ci);
2844
2845 dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
2846 mds, ceph_cap_string(used), ceph_cap_string(drop),
2847 ceph_cap_string(unless));
2848
2849 /* only drop unused caps */
2850 drop &= ~used;
2851
2846 cap = __get_cap_for_mds(ci, mds); 2852 cap = __get_cap_for_mds(ci, mds);
2847 if (cap && __cap_is_valid(cap)) { 2853 if (cap && __cap_is_valid(cap)) {
2848 if (force || 2854 if (force ||
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index 291ac288e791..f704b3b62424 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -3,6 +3,7 @@
3 3
4#include <linux/err.h> 4#include <linux/err.h>
5#include <linux/scatterlist.h> 5#include <linux/scatterlist.h>
6#include <linux/slab.h>
6#include <crypto/hash.h> 7#include <crypto/hash.h>
7 8
8#include "crypto.h" 9#include "crypto.h"
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index e159f1415110..f7048da92acc 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -1,6 +1,7 @@
1#include "ceph_debug.h" 1#include "ceph_debug.h"
2 2
3#include <linux/device.h> 3#include <linux/device.h>
4#include <linux/slab.h>
4#include <linux/module.h> 5#include <linux/module.h>
5#include <linux/ctype.h> 6#include <linux/ctype.h>
6#include <linux/debugfs.h> 7#include <linux/debugfs.h>
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 5107384ee029..7261dc6c2ead 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -3,6 +3,7 @@
3#include <linux/spinlock.h> 3#include <linux/spinlock.h>
4#include <linux/fs_struct.h> 4#include <linux/fs_struct.h>
5#include <linux/namei.h> 5#include <linux/namei.h>
6#include <linux/slab.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
7 8
8#include "super.h" 9#include "super.h"
@@ -288,8 +289,10 @@ more:
288 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 289 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
289 290
290 /* discard old result, if any */ 291 /* discard old result, if any */
291 if (fi->last_readdir) 292 if (fi->last_readdir) {
292 ceph_mdsc_put_request(fi->last_readdir); 293 ceph_mdsc_put_request(fi->last_readdir);
294 fi->last_readdir = NULL;
295 }
293 296
294 /* requery frag tree, as the frag topology may have changed */ 297 /* requery frag tree, as the frag topology may have changed */
295 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); 298 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fc68e39cbad6..9d67572fb328 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -1,6 +1,7 @@
1#include "ceph_debug.h" 1#include "ceph_debug.h"
2 2
3#include <linux/exportfs.h> 3#include <linux/exportfs.h>
4#include <linux/slab.h>
4#include <asm/unaligned.h> 5#include <asm/unaligned.h>
5 6
6#include "super.h" 7#include "super.h"
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5d2af8464f6a..4add3d5da2c1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1,6 +1,7 @@
1#include "ceph_debug.h" 1#include "ceph_debug.h"
2 2
3#include <linux/sched.h> 3#include <linux/sched.h>
4#include <linux/slab.h>
4#include <linux/file.h> 5#include <linux/file.h>
5#include <linux/namei.h> 6#include <linux/namei.h>
6#include <linux/writeback.h> 7#include <linux/writeback.h>
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7abe1aed819b..aca82d55cc53 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode)
378 378
379 ceph_queue_caps_release(inode); 379 ceph_queue_caps_release(inode);
380 380
381 /*
382 * we may still have a snap_realm reference if there are stray
383 * caps in i_cap_exporting_issued or i_snap_caps.
384 */
385 if (ci->i_snap_realm) {
386 struct ceph_mds_client *mdsc =
387 &ceph_client(ci->vfs_inode.i_sb)->mdsc;
388 struct ceph_snap_realm *realm = ci->i_snap_realm;
389
390 dout(" dropping residual ref to snap realm %p\n", realm);
391 spin_lock(&realm->inodes_with_caps_lock);
392 list_del_init(&ci->i_snap_realm_item);
393 spin_unlock(&realm->inodes_with_caps_lock);
394 ceph_put_snap_realm(mdsc, realm);
395 }
396
381 kfree(ci->i_symlink); 397 kfree(ci->i_symlink);
382 while ((n = rb_first(&ci->i_fragtree)) != NULL) { 398 while ((n = rb_first(&ci->i_fragtree)) != NULL) {
383 frag = rb_entry(n, struct ceph_inode_frag, node); 399 frag = rb_entry(n, struct ceph_inode_frag, node);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a2600101ec22..60a9a4ae47be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1,6 +1,7 @@
1#include "ceph_debug.h" 1#include "ceph_debug.h"
2 2
3#include <linux/wait.h> 3#include <linux/wait.h>
4#include <linux/slab.h>
4#include <linux/sched.h> 5#include <linux/sched.h>
5 6
6#include "mds_client.h" 7#include "mds_client.h"
@@ -328,6 +329,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
328 struct ceph_mds_session *s; 329 struct ceph_mds_session *s;
329 330
330 s = kzalloc(sizeof(*s), GFP_NOFS); 331 s = kzalloc(sizeof(*s), GFP_NOFS);
332 if (!s)
333 return ERR_PTR(-ENOMEM);
331 s->s_mdsc = mdsc; 334 s->s_mdsc = mdsc;
332 s->s_mds = mds; 335 s->s_mds = mds;
333 s->s_state = CEPH_MDS_SESSION_NEW; 336 s->s_state = CEPH_MDS_SESSION_NEW;
@@ -529,7 +532,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
529{ 532{
530 dout("__unregister_request %p tid %lld\n", req, req->r_tid); 533 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
531 rb_erase(&req->r_node, &mdsc->request_tree); 534 rb_erase(&req->r_node, &mdsc->request_tree);
532 ceph_mdsc_put_request(req); 535 RB_CLEAR_NODE(&req->r_node);
533 536
534 if (req->r_unsafe_dir) { 537 if (req->r_unsafe_dir) {
535 struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); 538 struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
@@ -538,6 +541,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
538 list_del_init(&req->r_unsafe_dir_item); 541 list_del_init(&req->r_unsafe_dir_item);
539 spin_unlock(&ci->i_unsafe_lock); 542 spin_unlock(&ci->i_unsafe_lock);
540 } 543 }
544
545 ceph_mdsc_put_request(req);
541} 546}
542 547
543/* 548/*
@@ -862,6 +867,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
862 if (time_after_eq(jiffies, session->s_cap_ttl) && 867 if (time_after_eq(jiffies, session->s_cap_ttl) &&
863 time_after_eq(session->s_cap_ttl, session->s_renew_requested)) 868 time_after_eq(session->s_cap_ttl, session->s_renew_requested))
864 pr_info("mds%d caps stale\n", session->s_mds); 869 pr_info("mds%d caps stale\n", session->s_mds);
870 session->s_renew_requested = jiffies;
865 871
866 /* do not try to renew caps until a recovering mds has reconnected 872 /* do not try to renew caps until a recovering mds has reconnected
867 * with its clients. */ 873 * with its clients. */
@@ -874,7 +880,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
874 880
875 dout("send_renew_caps to mds%d (%s)\n", session->s_mds, 881 dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
876 ceph_mds_state_name(state)); 882 ceph_mds_state_name(state));
877 session->s_renew_requested = jiffies;
878 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, 883 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
879 ++session->s_renew_seq); 884 ++session->s_renew_seq);
880 if (IS_ERR(msg)) 885 if (IS_ERR(msg))
@@ -1566,8 +1571,13 @@ static int __do_request(struct ceph_mds_client *mdsc,
1566 1571
1567 /* get, open session */ 1572 /* get, open session */
1568 session = __ceph_lookup_mds_session(mdsc, mds); 1573 session = __ceph_lookup_mds_session(mdsc, mds);
1569 if (!session) 1574 if (!session) {
1570 session = register_session(mdsc, mds); 1575 session = register_session(mdsc, mds);
1576 if (IS_ERR(session)) {
1577 err = PTR_ERR(session);
1578 goto finish;
1579 }
1580 }
1571 dout("do_request mds%d session %p state %s\n", mds, session, 1581 dout("do_request mds%d session %p state %s\n", mds, session,
1572 session_state_name(session->s_state)); 1582 session_state_name(session->s_state));
1573 if (session->s_state != CEPH_MDS_SESSION_OPEN && 1583 if (session->s_state != CEPH_MDS_SESSION_OPEN &&
@@ -1770,7 +1780,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1770 dout("handle_reply %p\n", req); 1780 dout("handle_reply %p\n", req);
1771 1781
1772 /* correct session? */ 1782 /* correct session? */
1773 if (!req->r_session && req->r_session != session) { 1783 if (req->r_session != session) {
1774 pr_err("mdsc_handle_reply got %llu on session mds%d" 1784 pr_err("mdsc_handle_reply got %llu on session mds%d"
1775 " not mds%d\n", tid, session->s_mds, 1785 " not mds%d\n", tid, session->s_mds,
1776 req->r_session ? req->r_session->s_mds : -1); 1786 req->r_session ? req->r_session->s_mds : -1);
@@ -2682,29 +2692,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
2682 */ 2692 */
2683static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) 2693static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
2684{ 2694{
2685 struct ceph_mds_request *req = NULL; 2695 struct ceph_mds_request *req = NULL, *nextreq;
2686 struct rb_node *n; 2696 struct rb_node *n;
2687 2697
2688 mutex_lock(&mdsc->mutex); 2698 mutex_lock(&mdsc->mutex);
2689 dout("wait_unsafe_requests want %lld\n", want_tid); 2699 dout("wait_unsafe_requests want %lld\n", want_tid);
2700restart:
2690 req = __get_oldest_req(mdsc); 2701 req = __get_oldest_req(mdsc);
2691 while (req && req->r_tid <= want_tid) { 2702 while (req && req->r_tid <= want_tid) {
2703 /* find next request */
2704 n = rb_next(&req->r_node);
2705 if (n)
2706 nextreq = rb_entry(n, struct ceph_mds_request, r_node);
2707 else
2708 nextreq = NULL;
2692 if ((req->r_op & CEPH_MDS_OP_WRITE)) { 2709 if ((req->r_op & CEPH_MDS_OP_WRITE)) {
2693 /* write op */ 2710 /* write op */
2694 ceph_mdsc_get_request(req); 2711 ceph_mdsc_get_request(req);
2712 if (nextreq)
2713 ceph_mdsc_get_request(nextreq);
2695 mutex_unlock(&mdsc->mutex); 2714 mutex_unlock(&mdsc->mutex);
2696 dout("wait_unsafe_requests wait on %llu (want %llu)\n", 2715 dout("wait_unsafe_requests wait on %llu (want %llu)\n",
2697 req->r_tid, want_tid); 2716 req->r_tid, want_tid);
2698 wait_for_completion(&req->r_safe_completion); 2717 wait_for_completion(&req->r_safe_completion);
2699 mutex_lock(&mdsc->mutex); 2718 mutex_lock(&mdsc->mutex);
2700 n = rb_next(&req->r_node);
2701 ceph_mdsc_put_request(req); 2719 ceph_mdsc_put_request(req);
2702 } else { 2720 if (!nextreq)
2703 n = rb_next(&req->r_node); 2721 break; /* next dne before, so we're done! */
2722 if (RB_EMPTY_NODE(&nextreq->r_node)) {
2723 /* next request was removed from tree */
2724 ceph_mdsc_put_request(nextreq);
2725 goto restart;
2726 }
2727 ceph_mdsc_put_request(nextreq); /* won't go away */
2704 } 2728 }
2705 if (!n) 2729 req = nextreq;
2706 break;
2707 req = rb_entry(n, struct ceph_mds_request, r_node);
2708 } 2730 }
2709 mutex_unlock(&mdsc->mutex); 2731 mutex_unlock(&mdsc->mutex);
2710 dout("wait_unsafe_requests done\n"); 2732 dout("wait_unsafe_requests done\n");
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 781656a49bf8..8f1715ffbe4b 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -6,6 +6,7 @@
6#include <linux/inet.h> 6#include <linux/inet.h>
7#include <linux/kthread.h> 7#include <linux/kthread.h>
8#include <linux/net.h> 8#include <linux/net.h>
9#include <linux/slab.h>
9#include <linux/socket.h> 10#include <linux/socket.h>
10#include <linux/string.h> 11#include <linux/string.h>
11#include <net/tcp.h> 12#include <net/tcp.h>
@@ -366,6 +367,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
366} 367}
367 368
368/* 369/*
370 * return true if this connection ever successfully opened
371 */
372bool ceph_con_opened(struct ceph_connection *con)
373{
374 return con->connect_seq > 0;
375}
376
377/*
369 * generic get/put 378 * generic get/put
370 */ 379 */
371struct ceph_connection *ceph_con_get(struct ceph_connection *con) 380struct ceph_connection *ceph_con_get(struct ceph_connection *con)
@@ -830,13 +839,6 @@ static void prepare_read_connect(struct ceph_connection *con)
830 con->in_base_pos = 0; 839 con->in_base_pos = 0;
831} 840}
832 841
833static void prepare_read_connect_retry(struct ceph_connection *con)
834{
835 dout("prepare_read_connect_retry %p\n", con);
836 con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr)
837 + sizeof(con->peer_addr_for_me);
838}
839
840static void prepare_read_ack(struct ceph_connection *con) 842static void prepare_read_ack(struct ceph_connection *con)
841{ 843{
842 dout("prepare_read_ack %p\n", con); 844 dout("prepare_read_ack %p\n", con);
@@ -1146,7 +1148,7 @@ static int process_connect(struct ceph_connection *con)
1146 } 1148 }
1147 con->auth_retry = 1; 1149 con->auth_retry = 1;
1148 prepare_write_connect(con->msgr, con, 0); 1150 prepare_write_connect(con->msgr, con, 0);
1149 prepare_read_connect_retry(con); 1151 prepare_read_connect(con);
1150 break; 1152 break;
1151 1153
1152 case CEPH_MSGR_TAG_RESETSESSION: 1154 case CEPH_MSGR_TAG_RESETSESSION:
@@ -1843,8 +1845,6 @@ static void ceph_fault(struct ceph_connection *con)
1843 goto out; 1845 goto out;
1844 } 1846 }
1845 1847
1846 clear_bit(BUSY, &con->state); /* to avoid an improbable race */
1847
1848 mutex_lock(&con->mutex); 1848 mutex_lock(&con->mutex);
1849 if (test_bit(CLOSED, &con->state)) 1849 if (test_bit(CLOSED, &con->state))
1850 goto out_unlock; 1850 goto out_unlock;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 4caaa5911110..a343dae73cdc 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr,
223 struct ceph_connection *con); 223 struct ceph_connection *con);
224extern void ceph_con_open(struct ceph_connection *con, 224extern void ceph_con_open(struct ceph_connection *con,
225 struct ceph_entity_addr *addr); 225 struct ceph_entity_addr *addr);
226extern bool ceph_con_opened(struct ceph_connection *con);
226extern void ceph_con_close(struct ceph_connection *con); 227extern void ceph_con_close(struct ceph_connection *con);
227extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); 228extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
228extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); 229extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 890597c09d43..8fdc011ca956 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -1,6 +1,7 @@
1#include "ceph_debug.h" 1#include "ceph_debug.h"
2 2
3#include <linux/types.h> 3#include <linux/types.h>
4#include <linux/slab.h>
4#include <linux/random.h> 5#include <linux/random.h>
5#include <linux/sched.h> 6#include <linux/sched.h>
6 7
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index dbe63db9762f..c7b4dedaace6 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
413 */ 413 */
414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) 414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
415{ 415{
416 struct ceph_osd_request *req;
416 int ret = 0; 417 int ret = 0;
417 418
418 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 419 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
419 if (list_empty(&osd->o_requests)) { 420 if (list_empty(&osd->o_requests)) {
420 __remove_osd(osdc, osd); 421 __remove_osd(osdc, osd);
422 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
423 &osd->o_con.peer_addr,
424 sizeof(osd->o_con.peer_addr)) == 0 &&
425 !ceph_con_opened(&osd->o_con)) {
426 dout(" osd addr hasn't changed and connection never opened,"
427 " letting msgr retry");
428 /* touch each r_stamp for handle_timeout()'s benfit */
429 list_for_each_entry(req, &osd->o_requests, r_osd_item)
430 req->r_stamp = jiffies;
431 ret = -EAGAIN;
421 } else { 432 } else {
422 ceph_con_close(&osd->o_con); 433 ceph_con_close(&osd->o_con);
423 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); 434 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
@@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc,
633 reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ 644 reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */
634 reqhead->reassert_version = req->r_reassert_version; 645 reqhead->reassert_version = req->r_reassert_version;
635 646
636 req->r_sent_stamp = jiffies; 647 req->r_stamp = jiffies;
637 list_move_tail(&osdc->req_lru, &req->r_req_lru_item); 648 list_move_tail(&osdc->req_lru, &req->r_req_lru_item);
638 649
639 ceph_msg_get(req->r_request); /* send consumes a ref */ 650 ceph_msg_get(req->r_request); /* send consumes a ref */
@@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work)
660 unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; 671 unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
661 unsigned long keepalive = 672 unsigned long keepalive =
662 osdc->client->mount_args->osd_keepalive_timeout * HZ; 673 osdc->client->mount_args->osd_keepalive_timeout * HZ;
663 unsigned long last_sent = 0; 674 unsigned long last_stamp = 0;
664 struct rb_node *p; 675 struct rb_node *p;
665 struct list_head slow_osds; 676 struct list_head slow_osds;
666 677
@@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work)
697 req = list_entry(osdc->req_lru.next, struct ceph_osd_request, 708 req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
698 r_req_lru_item); 709 r_req_lru_item);
699 710
700 if (time_before(jiffies, req->r_sent_stamp + timeout)) 711 if (time_before(jiffies, req->r_stamp + timeout))
701 break; 712 break;
702 713
703 BUG_ON(req == last_req && req->r_sent_stamp == last_sent); 714 BUG_ON(req == last_req && req->r_stamp == last_stamp);
704 last_req = req; 715 last_req = req;
705 last_sent = req->r_sent_stamp; 716 last_stamp = req->r_stamp;
706 717
707 osd = req->r_osd; 718 osd = req->r_osd;
708 BUG_ON(!osd); 719 BUG_ON(!osd);
@@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work)
718 */ 729 */
719 INIT_LIST_HEAD(&slow_osds); 730 INIT_LIST_HEAD(&slow_osds);
720 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { 731 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
721 if (time_before(jiffies, req->r_sent_stamp + keepalive)) 732 if (time_before(jiffies, req->r_stamp + keepalive))
722 break; 733 break;
723 734
724 osd = req->r_osd; 735 osd = req->r_osd;
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc,
862 873
863 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); 874 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
864 if (kickosd) { 875 if (kickosd) {
865 __reset_osd(osdc, kickosd); 876 err = __reset_osd(osdc, kickosd);
877 if (err == -EAGAIN)
878 return 1;
866 } else { 879 } else {
867 for (p = rb_first(&osdc->osds); p; p = n) { 880 for (p = rb_first(&osdc->osds); p; p = n) {
868 struct ceph_osd *osd = 881 struct ceph_osd *osd =
@@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc,
913 926
914kick: 927kick:
915 dout("kicking %p tid %llu osd%d\n", req, req->r_tid, 928 dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
916 req->r_osd->o_osd); 929 req->r_osd ? req->r_osd->o_osd : -1);
917 req->r_flags |= CEPH_OSD_FLAG_RETRY; 930 req->r_flags |= CEPH_OSD_FLAG_RETRY;
918 err = __send_request(osdc, req); 931 err = __send_request(osdc, req);
919 if (err) { 932 if (err) {
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index 1b1a3ca43afc..b0759911e7c3 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -70,7 +70,7 @@ struct ceph_osd_request {
70 70
71 char r_oid[40]; /* object name */ 71 char r_oid[40]; /* object name */
72 int r_oid_len; 72 int r_oid_len;
73 unsigned long r_sent_stamp; 73 unsigned long r_stamp; /* send OR check time */
74 bool r_resend; /* msg send failed, needs retry */ 74 bool r_resend; /* msg send failed, needs retry */
75 75
76 struct ceph_file_layout r_file_layout; 76 struct ceph_file_layout r_file_layout;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index b83f2692b835..21c6623c4b07 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1,4 +1,7 @@
1 1
2#include "ceph_debug.h"
3
4#include <linux/slab.h>
2#include <asm/div64.h> 5#include <asm/div64.h>
3 6
4#include "super.h" 7#include "super.h"
@@ -6,7 +9,6 @@
6#include "crush/hash.h" 9#include "crush/hash.h"
7#include "crush/mapper.h" 10#include "crush/mapper.h"
8#include "decode.h" 11#include "decode.h"
9#include "ceph_debug.h"
10 12
11char *ceph_osdmap_state_str(char *str, int len, int state) 13char *ceph_osdmap_state_str(char *str, int len, int state)
12{ 14{
@@ -480,6 +482,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
480 return NULL; 482 return NULL;
481} 483}
482 484
485void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
486{
487 ceph_decode_copy(p, &pi->v, sizeof(pi->v));
488 calc_pg_masks(pi);
489 *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
490 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
491}
492
483/* 493/*
484 * decode a full map. 494 * decode a full map.
485 */ 495 */
@@ -526,12 +536,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
526 ev, CEPH_PG_POOL_VERSION); 536 ev, CEPH_PG_POOL_VERSION);
527 goto bad; 537 goto bad;
528 } 538 }
529 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 539 __decode_pool(p, pi);
530 __insert_pg_pool(&map->pg_pools, pi); 540 __insert_pg_pool(&map->pg_pools, pi);
531 calc_pg_masks(pi);
532 *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
533 *p += le32_to_cpu(pi->v.num_removed_snap_intervals)
534 * sizeof(u64) * 2;
535 } 541 }
536 ceph_decode_32_safe(p, end, map->pool_max, bad); 542 ceph_decode_32_safe(p, end, map->pool_max, bad);
537 543
@@ -714,8 +720,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
714 pi->id = pool; 720 pi->id = pool;
715 __insert_pg_pool(&map->pg_pools, pi); 721 __insert_pg_pool(&map->pg_pools, pi);
716 } 722 }
717 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 723 __decode_pool(p, pi);
718 calc_pg_masks(pi);
719 } 724 }
720 725
721 /* old_pool */ 726 /* old_pool */
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index 370e93695474..5f8dbf7c745a 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -1,4 +1,5 @@
1 1
2#include <linux/gfp.h>
2#include <linux/pagemap.h> 3#include <linux/pagemap.h>
3#include <linux/highmem.h> 4#include <linux/highmem.h>
4 5
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index bf2a5f3846a4..e6f9bc57d472 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,6 +1,7 @@
1#include "ceph_debug.h" 1#include "ceph_debug.h"
2 2
3#include <linux/sort.h> 3#include <linux/sort.h>
4#include <linux/slab.h>
4 5
5#include "super.h" 6#include "super.h"
6#include "decode.h" 7#include "decode.h"
@@ -314,9 +315,9 @@ static int build_snap_context(struct ceph_snap_realm *realm)
314 because we rebuild_snap_realms() works _downward_ in 315 because we rebuild_snap_realms() works _downward_ in
315 hierarchy after each update.) */ 316 hierarchy after each update.) */
316 if (realm->cached_context && 317 if (realm->cached_context &&
317 realm->cached_context->seq <= realm->seq && 318 realm->cached_context->seq == realm->seq &&
318 (!parent || 319 (!parent ||
319 realm->cached_context->seq <= parent->cached_context->seq)) { 320 realm->cached_context->seq >= parent->cached_context->seq)) {
320 dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" 321 dout("build_snap_context %llx %p: %p seq %lld (%d snaps)"
321 " (unchanged)\n", 322 " (unchanged)\n",
322 realm->ino, realm, realm->cached_context, 323 realm->ino, realm, realm->cached_context,
@@ -818,7 +819,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
818 * queued (again) by ceph_update_snap_trace() 819 * queued (again) by ceph_update_snap_trace()
819 * below. Queue it _now_, under the old context. 820 * below. Queue it _now_, under the old context.
820 */ 821 */
822 spin_lock(&realm->inodes_with_caps_lock);
821 list_del_init(&ci->i_snap_realm_item); 823 list_del_init(&ci->i_snap_realm_item);
824 spin_unlock(&realm->inodes_with_caps_lock);
822 spin_unlock(&inode->i_lock); 825 spin_unlock(&inode->i_lock);
823 826
824 ceph_queue_cap_snap(ci, 827 ceph_queue_cap_snap(ci,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4290a6e860b0..75d02eaa1279 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -11,6 +11,7 @@
11#include <linux/rwsem.h> 11#include <linux/rwsem.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/slab.h>
14#include <linux/statfs.h> 15#include <linux/statfs.h>
15#include <linux/string.h> 16#include <linux/string.h>
16#include <linux/version.h> 17#include <linux/version.h>
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 65d12036b670..ca702c67bc66 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -12,6 +12,7 @@
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/wait.h> 13#include <linux/wait.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/slab.h>
15 16
16#include "types.h" 17#include "types.h"
17#include "messenger.h" 18#include "messenger.h"
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 37d6ce645691..2845422907fc 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -3,6 +3,7 @@
3#include "decode.h" 3#include "decode.h"
4 4
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/slab.h>
6 7
7static bool ceph_is_valid_xattr(const char *name) 8static bool ceph_is_valid_xattr(const char *name)
8{ 9{