diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-29 17:42:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-29 17:42:25 -0400 |
commit | 9f321603724be7386ea39ea41fd885954db60a4a (patch) | |
tree | efd64c26c2fb2698ecd95c2f10dc1016b45ba4a4 | |
parent | 9d54e2c0b0a03b0f05fc4f988323c858ec9d7740 (diff) | |
parent | 82593f87b6c1922a8f8317bb165c6c7794fa4639 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (28 commits)
ceph: update discussion list address in MAINTAINERS
ceph: some documentations fixes
ceph: fix use after free on mds __unregister_request
ceph: avoid loaded term 'OSD' in documention
ceph: fix possible double-free of mds request reference
ceph: fix session check on mds reply
ceph: handle kmalloc() failure
ceph: propagate mds session allocation failures to caller
ceph: make write_begin wait propagate ERESTARTSYS
ceph: fix snap rebuild condition
ceph: avoid reopening osd connections when address hasn't changed
ceph: rename r_sent_stamp r_stamp
ceph: fix connection fault con_work reentrancy problem
ceph: prevent dup stale messages to console for restarting mds
ceph: fix pg pool decoding from incremental osdmap update
ceph: fix mds sync() race with completing requests
ceph: only release unused caps with mds requests
ceph: clean up handle_cap_grant, handle_caps wrt session mutex
ceph: fix session locking in handle_caps, ceph_check_caps
ceph: drop unnecessary WARN_ON in caps migration
...
-rw-r--r-- | Documentation/filesystems/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/filesystems/ceph.txt | 11 | ||||
-rw-r--r-- | MAINTAINERS | 2 | ||||
-rw-r--r-- | fs/ceph/addr.c | 10 | ||||
-rw-r--r-- | fs/ceph/auth_x.c | 53 | ||||
-rw-r--r-- | fs/ceph/caps.c | 73 | ||||
-rw-r--r-- | fs/ceph/dir.c | 4 | ||||
-rw-r--r-- | fs/ceph/inode.c | 16 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 43 | ||||
-rw-r--r-- | fs/ceph/messenger.c | 19 | ||||
-rw-r--r-- | fs/ceph/messenger.h | 1 | ||||
-rw-r--r-- | fs/ceph/osd_client.c | 29 | ||||
-rw-r--r-- | fs/ceph/osd_client.h | 2 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 17 | ||||
-rw-r--r-- | fs/ceph/snap.c | 6 |
15 files changed, 191 insertions, 97 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 3bae418c6ad3..4303614b5add 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -16,6 +16,8 @@ befs.txt | |||
16 | - information about the BeOS filesystem for Linux. | 16 | - information about the BeOS filesystem for Linux. |
17 | bfs.txt | 17 | bfs.txt |
18 | - info for the SCO UnixWare Boot Filesystem (BFS). | 18 | - info for the SCO UnixWare Boot Filesystem (BFS). |
19 | ceph.txt | ||
20 | - info for the Ceph Distributed File System | ||
19 | cifs.txt | 21 | cifs.txt |
20 | - description of the CIFS filesystem. | 22 | - description of the CIFS filesystem. |
21 | coda.txt | 23 | coda.txt |
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index 6e03917316bd..0660c9f5deef 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt | |||
@@ -8,7 +8,7 @@ Basic features include: | |||
8 | 8 | ||
9 | * POSIX semantics | 9 | * POSIX semantics |
10 | * Seamless scaling from 1 to many thousands of nodes | 10 | * Seamless scaling from 1 to many thousands of nodes |
11 | * High availability and reliability. No single points of failure. | 11 | * High availability and reliability. No single point of failure. |
12 | * N-way replication of data across storage nodes | 12 | * N-way replication of data across storage nodes |
13 | * Fast recovery from node failures | 13 | * Fast recovery from node failures |
14 | * Automatic rebalancing of data on node addition/removal | 14 | * Automatic rebalancing of data on node addition/removal |
@@ -94,7 +94,7 @@ Mount Options | |||
94 | 94 | ||
95 | wsize=X | 95 | wsize=X |
96 | Specify the maximum write size in bytes. By default there is no | 96 | Specify the maximum write size in bytes. By default there is no |
97 | maximu. Ceph will normally size writes based on the file stripe | 97 | maximum. Ceph will normally size writes based on the file stripe |
98 | size. | 98 | size. |
99 | 99 | ||
100 | rsize=X | 100 | rsize=X |
@@ -115,7 +115,7 @@ Mount Options | |||
115 | number of entries in that directory. | 115 | number of entries in that directory. |
116 | 116 | ||
117 | nocrc | 117 | nocrc |
118 | Disable CRC32C calculation for data writes. If set, the OSD | 118 | Disable CRC32C calculation for data writes. If set, the storage node |
119 | must rely on TCP's error correction to detect data corruption | 119 | must rely on TCP's error correction to detect data corruption |
120 | in the data payload. | 120 | in the data payload. |
121 | 121 | ||
@@ -133,7 +133,8 @@ For more information on Ceph, see the home page at | |||
133 | http://ceph.newdream.net/ | 133 | http://ceph.newdream.net/ |
134 | 134 | ||
135 | The Linux kernel client source tree is available at | 135 | The Linux kernel client source tree is available at |
136 | git://ceph.newdream.net/linux-ceph-client.git | 136 | git://ceph.newdream.net/git/ceph-client.git |
137 | git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | ||
137 | 138 | ||
138 | and the source for the full system is at | 139 | and the source for the full system is at |
139 | git://ceph.newdream.net/ceph.git | 140 | git://ceph.newdream.net/git/ceph.git |
diff --git a/MAINTAINERS b/MAINTAINERS index 1a203f9626f6..088bd41ac71e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1443,7 +1443,7 @@ F: arch/powerpc/platforms/cell/ | |||
1443 | 1443 | ||
1444 | CEPH DISTRIBUTED FILE SYSTEM CLIENT | 1444 | CEPH DISTRIBUTED FILE SYSTEM CLIENT |
1445 | M: Sage Weil <sage@newdream.net> | 1445 | M: Sage Weil <sage@newdream.net> |
1446 | L: ceph-devel@lists.sourceforge.net | 1446 | L: ceph-devel@vger.kernel.org |
1447 | W: http://ceph.newdream.net/ | 1447 | W: http://ceph.newdream.net/ |
1448 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git | 1448 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git |
1449 | S: Supported | 1449 | S: Supported |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 23bb0ceabe31..ce8ef6107727 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -919,6 +919,10 @@ static int context_is_writeable_or_written(struct inode *inode, | |||
919 | /* | 919 | /* |
920 | * We are only allowed to write into/dirty the page if the page is | 920 | * We are only allowed to write into/dirty the page if the page is |
921 | * clean, or already dirty within the same snap context. | 921 | * clean, or already dirty within the same snap context. |
922 | * | ||
923 | * called with page locked. | ||
924 | * return success with page locked, | ||
925 | * or any failure (incl -EAGAIN) with page unlocked. | ||
922 | */ | 926 | */ |
923 | static int ceph_update_writeable_page(struct file *file, | 927 | static int ceph_update_writeable_page(struct file *file, |
924 | loff_t pos, unsigned len, | 928 | loff_t pos, unsigned len, |
@@ -961,9 +965,11 @@ retry_locked: | |||
961 | snapc = ceph_get_snap_context((void *)page->private); | 965 | snapc = ceph_get_snap_context((void *)page->private); |
962 | unlock_page(page); | 966 | unlock_page(page); |
963 | ceph_queue_writeback(inode); | 967 | ceph_queue_writeback(inode); |
964 | wait_event_interruptible(ci->i_cap_wq, | 968 | r = wait_event_interruptible(ci->i_cap_wq, |
965 | context_is_writeable_or_written(inode, snapc)); | 969 | context_is_writeable_or_written(inode, snapc)); |
966 | ceph_put_snap_context(snapc); | 970 | ceph_put_snap_context(snapc); |
971 | if (r == -ERESTARTSYS) | ||
972 | return r; | ||
967 | return -EAGAIN; | 973 | return -EAGAIN; |
968 | } | 974 | } |
969 | 975 | ||
@@ -1035,7 +1041,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1035 | int r; | 1041 | int r; |
1036 | 1042 | ||
1037 | do { | 1043 | do { |
1038 | /* get a page*/ | 1044 | /* get a page */ |
1039 | page = grab_cache_page_write_begin(mapping, index, 0); | 1045 | page = grab_cache_page_write_begin(mapping, index, 0); |
1040 | if (!page) | 1046 | if (!page) |
1041 | return -ENOMEM; | 1047 | return -ENOMEM; |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index f0318427b6da..8d8a84964763 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -28,6 +28,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) | |||
28 | return (ac->want_keys & xi->have_keys) == ac->want_keys; | 28 | return (ac->want_keys & xi->have_keys) == ac->want_keys; |
29 | } | 29 | } |
30 | 30 | ||
31 | static int ceph_x_encrypt_buflen(int ilen) | ||
32 | { | ||
33 | return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + | ||
34 | sizeof(u32); | ||
35 | } | ||
36 | |||
31 | static int ceph_x_encrypt(struct ceph_crypto_key *secret, | 37 | static int ceph_x_encrypt(struct ceph_crypto_key *secret, |
32 | void *ibuf, int ilen, void *obuf, size_t olen) | 38 | void *ibuf, int ilen, void *obuf, size_t olen) |
33 | { | 39 | { |
@@ -150,6 +156,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
150 | struct timespec validity; | 156 | struct timespec validity; |
151 | struct ceph_crypto_key old_key; | 157 | struct ceph_crypto_key old_key; |
152 | void *tp, *tpend; | 158 | void *tp, *tpend; |
159 | struct ceph_timespec new_validity; | ||
160 | struct ceph_crypto_key new_session_key; | ||
161 | struct ceph_buffer *new_ticket_blob; | ||
162 | unsigned long new_expires, new_renew_after; | ||
163 | u64 new_secret_id; | ||
153 | 164 | ||
154 | ceph_decode_need(&p, end, sizeof(u32) + 1, bad); | 165 | ceph_decode_need(&p, end, sizeof(u32) + 1, bad); |
155 | 166 | ||
@@ -182,16 +193,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
182 | goto bad; | 193 | goto bad; |
183 | 194 | ||
184 | memcpy(&old_key, &th->session_key, sizeof(old_key)); | 195 | memcpy(&old_key, &th->session_key, sizeof(old_key)); |
185 | ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); | 196 | ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); |
186 | if (ret) | 197 | if (ret) |
187 | goto out; | 198 | goto out; |
188 | 199 | ||
189 | ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); | 200 | ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); |
190 | ceph_decode_timespec(&validity, &th->validity); | 201 | ceph_decode_timespec(&validity, &new_validity); |
191 | th->expires = get_seconds() + validity.tv_sec; | 202 | new_expires = get_seconds() + validity.tv_sec; |
192 | th->renew_after = th->expires - (validity.tv_sec / 4); | 203 | new_renew_after = new_expires - (validity.tv_sec / 4); |
193 | dout(" expires=%lu renew_after=%lu\n", th->expires, | 204 | dout(" expires=%lu renew_after=%lu\n", new_expires, |
194 | th->renew_after); | 205 | new_renew_after); |
195 | 206 | ||
196 | /* ticket blob for service */ | 207 | /* ticket blob for service */ |
197 | ceph_decode_8_safe(&p, end, is_enc, bad); | 208 | ceph_decode_8_safe(&p, end, is_enc, bad); |
@@ -216,10 +227,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
216 | dout(" ticket blob is %d bytes\n", dlen); | 227 | dout(" ticket blob is %d bytes\n", dlen); |
217 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); | 228 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); |
218 | struct_v = ceph_decode_8(&tp); | 229 | struct_v = ceph_decode_8(&tp); |
219 | th->secret_id = ceph_decode_64(&tp); | 230 | new_secret_id = ceph_decode_64(&tp); |
220 | ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); | 231 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); |
221 | if (ret) | 232 | if (ret) |
222 | goto out; | 233 | goto out; |
234 | |||
235 | /* all is well, update our ticket */ | ||
236 | ceph_crypto_key_destroy(&th->session_key); | ||
237 | if (th->ticket_blob) | ||
238 | ceph_buffer_put(th->ticket_blob); | ||
239 | th->session_key = new_session_key; | ||
240 | th->ticket_blob = new_ticket_blob; | ||
241 | th->validity = new_validity; | ||
242 | th->secret_id = new_secret_id; | ||
243 | th->expires = new_expires; | ||
244 | th->renew_after = new_renew_after; | ||
223 | dout(" got ticket service %d (%s) secret_id %lld len %d\n", | 245 | dout(" got ticket service %d (%s) secret_id %lld len %d\n", |
224 | type, ceph_entity_type_name(type), th->secret_id, | 246 | type, ceph_entity_type_name(type), th->secret_id, |
225 | (int)th->ticket_blob->vec.iov_len); | 247 | (int)th->ticket_blob->vec.iov_len); |
@@ -242,7 +264,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, | |||
242 | struct ceph_x_ticket_handler *th, | 264 | struct ceph_x_ticket_handler *th, |
243 | struct ceph_x_authorizer *au) | 265 | struct ceph_x_authorizer *au) |
244 | { | 266 | { |
245 | int len; | 267 | int maxlen; |
246 | struct ceph_x_authorize_a *msg_a; | 268 | struct ceph_x_authorize_a *msg_a; |
247 | struct ceph_x_authorize_b msg_b; | 269 | struct ceph_x_authorize_b msg_b; |
248 | void *p, *end; | 270 | void *p, *end; |
@@ -253,15 +275,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, | |||
253 | dout("build_authorizer for %s %p\n", | 275 | dout("build_authorizer for %s %p\n", |
254 | ceph_entity_type_name(th->service), au); | 276 | ceph_entity_type_name(th->service), au); |
255 | 277 | ||
256 | len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + | 278 | maxlen = sizeof(*msg_a) + sizeof(msg_b) + |
257 | ticket_blob_len + 16; | 279 | ceph_x_encrypt_buflen(ticket_blob_len); |
258 | dout(" need len %d\n", len); | 280 | dout(" need len %d\n", maxlen); |
259 | if (au->buf && au->buf->alloc_len < len) { | 281 | if (au->buf && au->buf->alloc_len < maxlen) { |
260 | ceph_buffer_put(au->buf); | 282 | ceph_buffer_put(au->buf); |
261 | au->buf = NULL; | 283 | au->buf = NULL; |
262 | } | 284 | } |
263 | if (!au->buf) { | 285 | if (!au->buf) { |
264 | au->buf = ceph_buffer_new(len, GFP_NOFS); | 286 | au->buf = ceph_buffer_new(maxlen, GFP_NOFS); |
265 | if (!au->buf) | 287 | if (!au->buf) |
266 | return -ENOMEM; | 288 | return -ENOMEM; |
267 | } | 289 | } |
@@ -296,6 +318,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, | |||
296 | au->buf->vec.iov_len = p - au->buf->vec.iov_base; | 318 | au->buf->vec.iov_len = p - au->buf->vec.iov_base; |
297 | dout(" built authorizer nonce %llx len %d\n", au->nonce, | 319 | dout(" built authorizer nonce %llx len %d\n", au->nonce, |
298 | (int)au->buf->vec.iov_len); | 320 | (int)au->buf->vec.iov_len); |
321 | BUG_ON(au->buf->vec.iov_len > maxlen); | ||
299 | return 0; | 322 | return 0; |
300 | 323 | ||
301 | out_buf: | 324 | out_buf: |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index db122bb357b8..7d0a0d0adc18 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1407,6 +1407,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1407 | */ | 1407 | */ |
1408 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1408 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1409 | struct ceph_mds_session *session) | 1409 | struct ceph_mds_session *session) |
1410 | __releases(session->s_mutex) | ||
1410 | { | 1411 | { |
1411 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1412 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); |
1412 | struct ceph_mds_client *mdsc = &client->mdsc; | 1413 | struct ceph_mds_client *mdsc = &client->mdsc; |
@@ -1414,7 +1415,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1414 | struct ceph_cap *cap; | 1415 | struct ceph_cap *cap; |
1415 | int file_wanted, used; | 1416 | int file_wanted, used; |
1416 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ | 1417 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ |
1417 | int drop_session_lock = session ? 0 : 1; | ||
1418 | int issued, implemented, want, retain, revoking, flushing = 0; | 1418 | int issued, implemented, want, retain, revoking, flushing = 0; |
1419 | int mds = -1; /* keep track of how far we've gone through i_caps list | 1419 | int mds = -1; /* keep track of how far we've gone through i_caps list |
1420 | to avoid an infinite loop on retry */ | 1420 | to avoid an infinite loop on retry */ |
@@ -1639,7 +1639,7 @@ ack: | |||
1639 | if (queue_invalidate) | 1639 | if (queue_invalidate) |
1640 | ceph_queue_invalidate(inode); | 1640 | ceph_queue_invalidate(inode); |
1641 | 1641 | ||
1642 | if (session && drop_session_lock) | 1642 | if (session) |
1643 | mutex_unlock(&session->s_mutex); | 1643 | mutex_unlock(&session->s_mutex); |
1644 | if (took_snap_rwsem) | 1644 | if (took_snap_rwsem) |
1645 | up_read(&mdsc->snap_rwsem); | 1645 | up_read(&mdsc->snap_rwsem); |
@@ -2195,18 +2195,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2195 | * Handle a cap GRANT message from the MDS. (Note that a GRANT may | 2195 | * Handle a cap GRANT message from the MDS. (Note that a GRANT may |
2196 | * actually be a revocation if it specifies a smaller cap set.) | 2196 | * actually be a revocation if it specifies a smaller cap set.) |
2197 | * | 2197 | * |
2198 | * caller holds s_mutex. | 2198 | * caller holds s_mutex and i_lock, we drop both. |
2199 | * | ||
2199 | * return value: | 2200 | * return value: |
2200 | * 0 - ok | 2201 | * 0 - ok |
2201 | * 1 - check_caps on auth cap only (writeback) | 2202 | * 1 - check_caps on auth cap only (writeback) |
2202 | * 2 - check_caps (ack revoke) | 2203 | * 2 - check_caps (ack revoke) |
2203 | */ | 2204 | */ |
2204 | static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | 2205 | static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, |
2205 | struct ceph_mds_session *session, | 2206 | struct ceph_mds_session *session, |
2206 | struct ceph_cap *cap, | 2207 | struct ceph_cap *cap, |
2207 | struct ceph_buffer *xattr_buf) | 2208 | struct ceph_buffer *xattr_buf) |
2208 | __releases(inode->i_lock) | 2209 | __releases(inode->i_lock) |
2209 | 2210 | __releases(session->s_mutex) | |
2210 | { | 2211 | { |
2211 | struct ceph_inode_info *ci = ceph_inode(inode); | 2212 | struct ceph_inode_info *ci = ceph_inode(inode); |
2212 | int mds = session->s_mds; | 2213 | int mds = session->s_mds; |
@@ -2216,7 +2217,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2216 | u64 size = le64_to_cpu(grant->size); | 2217 | u64 size = le64_to_cpu(grant->size); |
2217 | u64 max_size = le64_to_cpu(grant->max_size); | 2218 | u64 max_size = le64_to_cpu(grant->max_size); |
2218 | struct timespec mtime, atime, ctime; | 2219 | struct timespec mtime, atime, ctime; |
2219 | int reply = 0; | 2220 | int check_caps = 0; |
2220 | int wake = 0; | 2221 | int wake = 0; |
2221 | int writeback = 0; | 2222 | int writeback = 0; |
2222 | int revoked_rdcache = 0; | 2223 | int revoked_rdcache = 0; |
@@ -2329,11 +2330,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2329 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) | 2330 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) |
2330 | writeback = 1; /* will delay ack */ | 2331 | writeback = 1; /* will delay ack */ |
2331 | else if (dirty & ~newcaps) | 2332 | else if (dirty & ~newcaps) |
2332 | reply = 1; /* initiate writeback in check_caps */ | 2333 | check_caps = 1; /* initiate writeback in check_caps */ |
2333 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || | 2334 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || |
2334 | revoked_rdcache) | 2335 | revoked_rdcache) |
2335 | reply = 2; /* send revoke ack in check_caps */ | 2336 | check_caps = 2; /* send revoke ack in check_caps */ |
2336 | cap->issued = newcaps; | 2337 | cap->issued = newcaps; |
2338 | cap->implemented |= newcaps; | ||
2337 | } else if (cap->issued == newcaps) { | 2339 | } else if (cap->issued == newcaps) { |
2338 | dout("caps unchanged: %s -> %s\n", | 2340 | dout("caps unchanged: %s -> %s\n", |
2339 | ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); | 2341 | ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); |
@@ -2346,6 +2348,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2346 | * pending revocation */ | 2348 | * pending revocation */ |
2347 | wake = 1; | 2349 | wake = 1; |
2348 | } | 2350 | } |
2351 | BUG_ON(cap->issued & ~cap->implemented); | ||
2349 | 2352 | ||
2350 | spin_unlock(&inode->i_lock); | 2353 | spin_unlock(&inode->i_lock); |
2351 | if (writeback) | 2354 | if (writeback) |
@@ -2359,7 +2362,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2359 | ceph_queue_invalidate(inode); | 2362 | ceph_queue_invalidate(inode); |
2360 | if (wake) | 2363 | if (wake) |
2361 | wake_up(&ci->i_cap_wq); | 2364 | wake_up(&ci->i_cap_wq); |
2362 | return reply; | 2365 | |
2366 | if (check_caps == 1) | ||
2367 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, | ||
2368 | session); | ||
2369 | else if (check_caps == 2) | ||
2370 | ceph_check_caps(ci, CHECK_CAPS_NODELAY, session); | ||
2371 | else | ||
2372 | mutex_unlock(&session->s_mutex); | ||
2363 | } | 2373 | } |
2364 | 2374 | ||
2365 | /* | 2375 | /* |
@@ -2548,9 +2558,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2548 | ci->i_cap_exporting_issued = cap->issued; | 2558 | ci->i_cap_exporting_issued = cap->issued; |
2549 | } | 2559 | } |
2550 | __ceph_remove_cap(cap); | 2560 | __ceph_remove_cap(cap); |
2551 | } else { | ||
2552 | WARN_ON(!cap); | ||
2553 | } | 2561 | } |
2562 | /* else, we already released it */ | ||
2554 | 2563 | ||
2555 | spin_unlock(&inode->i_lock); | 2564 | spin_unlock(&inode->i_lock); |
2556 | } | 2565 | } |
@@ -2621,9 +2630,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2621 | u64 cap_id; | 2630 | u64 cap_id; |
2622 | u64 size, max_size; | 2631 | u64 size, max_size; |
2623 | u64 tid; | 2632 | u64 tid; |
2624 | int check_caps = 0; | ||
2625 | void *snaptrace; | 2633 | void *snaptrace; |
2626 | int r; | ||
2627 | 2634 | ||
2628 | dout("handle_caps from mds%d\n", mds); | 2635 | dout("handle_caps from mds%d\n", mds); |
2629 | 2636 | ||
@@ -2668,8 +2675,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2668 | case CEPH_CAP_OP_IMPORT: | 2675 | case CEPH_CAP_OP_IMPORT: |
2669 | handle_cap_import(mdsc, inode, h, session, | 2676 | handle_cap_import(mdsc, inode, h, session, |
2670 | snaptrace, le32_to_cpu(h->snap_trace_len)); | 2677 | snaptrace, le32_to_cpu(h->snap_trace_len)); |
2671 | check_caps = 1; /* we may have sent a RELEASE to the old auth */ | 2678 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, |
2672 | goto done; | 2679 | session); |
2680 | goto done_unlocked; | ||
2673 | } | 2681 | } |
2674 | 2682 | ||
2675 | /* the rest require a cap */ | 2683 | /* the rest require a cap */ |
@@ -2686,16 +2694,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2686 | switch (op) { | 2694 | switch (op) { |
2687 | case CEPH_CAP_OP_REVOKE: | 2695 | case CEPH_CAP_OP_REVOKE: |
2688 | case CEPH_CAP_OP_GRANT: | 2696 | case CEPH_CAP_OP_GRANT: |
2689 | r = handle_cap_grant(inode, h, session, cap, msg->middle); | 2697 | handle_cap_grant(inode, h, session, cap, msg->middle); |
2690 | if (r == 1) | 2698 | goto done_unlocked; |
2691 | ceph_check_caps(ceph_inode(inode), | ||
2692 | CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, | ||
2693 | session); | ||
2694 | else if (r == 2) | ||
2695 | ceph_check_caps(ceph_inode(inode), | ||
2696 | CHECK_CAPS_NODELAY, | ||
2697 | session); | ||
2698 | break; | ||
2699 | 2699 | ||
2700 | case CEPH_CAP_OP_FLUSH_ACK: | 2700 | case CEPH_CAP_OP_FLUSH_ACK: |
2701 | handle_cap_flush_ack(inode, tid, h, session, cap); | 2701 | handle_cap_flush_ack(inode, tid, h, session, cap); |
@@ -2713,9 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2713 | 2713 | ||
2714 | done: | 2714 | done: |
2715 | mutex_unlock(&session->s_mutex); | 2715 | mutex_unlock(&session->s_mutex); |
2716 | 2716 | done_unlocked: | |
2717 | if (check_caps) | ||
2718 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL); | ||
2719 | if (inode) | 2717 | if (inode) |
2720 | iput(inode); | 2718 | iput(inode); |
2721 | return; | 2719 | return; |
@@ -2838,11 +2836,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode, | |||
2838 | struct ceph_cap *cap; | 2836 | struct ceph_cap *cap; |
2839 | struct ceph_mds_request_release *rel = *p; | 2837 | struct ceph_mds_request_release *rel = *p; |
2840 | int ret = 0; | 2838 | int ret = 0; |
2841 | 2839 | int used = 0; | |
2842 | dout("encode_inode_release %p mds%d drop %s unless %s\n", inode, | ||
2843 | mds, ceph_cap_string(drop), ceph_cap_string(unless)); | ||
2844 | 2840 | ||
2845 | spin_lock(&inode->i_lock); | 2841 | spin_lock(&inode->i_lock); |
2842 | used = __ceph_caps_used(ci); | ||
2843 | |||
2844 | dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, | ||
2845 | mds, ceph_cap_string(used), ceph_cap_string(drop), | ||
2846 | ceph_cap_string(unless)); | ||
2847 | |||
2848 | /* only drop unused caps */ | ||
2849 | drop &= ~used; | ||
2850 | |||
2846 | cap = __get_cap_for_mds(ci, mds); | 2851 | cap = __get_cap_for_mds(ci, mds); |
2847 | if (cap && __cap_is_valid(cap)) { | 2852 | if (cap && __cap_is_valid(cap)) { |
2848 | if (force || | 2853 | if (force || |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5107384ee029..8a9116e15b70 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -288,8 +288,10 @@ more: | |||
288 | CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; | 288 | CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; |
289 | 289 | ||
290 | /* discard old result, if any */ | 290 | /* discard old result, if any */ |
291 | if (fi->last_readdir) | 291 | if (fi->last_readdir) { |
292 | ceph_mdsc_put_request(fi->last_readdir); | 292 | ceph_mdsc_put_request(fi->last_readdir); |
293 | fi->last_readdir = NULL; | ||
294 | } | ||
293 | 295 | ||
294 | /* requery frag tree, as the frag topology may have changed */ | 296 | /* requery frag tree, as the frag topology may have changed */ |
295 | frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); | 297 | frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7abe1aed819b..aca82d55cc53 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode) | |||
378 | 378 | ||
379 | ceph_queue_caps_release(inode); | 379 | ceph_queue_caps_release(inode); |
380 | 380 | ||
381 | /* | ||
382 | * we may still have a snap_realm reference if there are stray | ||
383 | * caps in i_cap_exporting_issued or i_snap_caps. | ||
384 | */ | ||
385 | if (ci->i_snap_realm) { | ||
386 | struct ceph_mds_client *mdsc = | ||
387 | &ceph_client(ci->vfs_inode.i_sb)->mdsc; | ||
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | ||
389 | |||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | ||
391 | spin_lock(&realm->inodes_with_caps_lock); | ||
392 | list_del_init(&ci->i_snap_realm_item); | ||
393 | spin_unlock(&realm->inodes_with_caps_lock); | ||
394 | ceph_put_snap_realm(mdsc, realm); | ||
395 | } | ||
396 | |||
381 | kfree(ci->i_symlink); | 397 | kfree(ci->i_symlink); |
382 | while ((n = rb_first(&ci->i_fragtree)) != NULL) { | 398 | while ((n = rb_first(&ci->i_fragtree)) != NULL) { |
383 | frag = rb_entry(n, struct ceph_inode_frag, node); | 399 | frag = rb_entry(n, struct ceph_inode_frag, node); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a2600101ec22..5c7920be6420 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -328,6 +328,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
328 | struct ceph_mds_session *s; | 328 | struct ceph_mds_session *s; |
329 | 329 | ||
330 | s = kzalloc(sizeof(*s), GFP_NOFS); | 330 | s = kzalloc(sizeof(*s), GFP_NOFS); |
331 | if (!s) | ||
332 | return ERR_PTR(-ENOMEM); | ||
331 | s->s_mdsc = mdsc; | 333 | s->s_mdsc = mdsc; |
332 | s->s_mds = mds; | 334 | s->s_mds = mds; |
333 | s->s_state = CEPH_MDS_SESSION_NEW; | 335 | s->s_state = CEPH_MDS_SESSION_NEW; |
@@ -529,7 +531,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
529 | { | 531 | { |
530 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); | 532 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); |
531 | rb_erase(&req->r_node, &mdsc->request_tree); | 533 | rb_erase(&req->r_node, &mdsc->request_tree); |
532 | ceph_mdsc_put_request(req); | 534 | RB_CLEAR_NODE(&req->r_node); |
533 | 535 | ||
534 | if (req->r_unsafe_dir) { | 536 | if (req->r_unsafe_dir) { |
535 | struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); | 537 | struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); |
@@ -538,6 +540,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
538 | list_del_init(&req->r_unsafe_dir_item); | 540 | list_del_init(&req->r_unsafe_dir_item); |
539 | spin_unlock(&ci->i_unsafe_lock); | 541 | spin_unlock(&ci->i_unsafe_lock); |
540 | } | 542 | } |
543 | |||
544 | ceph_mdsc_put_request(req); | ||
541 | } | 545 | } |
542 | 546 | ||
543 | /* | 547 | /* |
@@ -862,6 +866,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
862 | if (time_after_eq(jiffies, session->s_cap_ttl) && | 866 | if (time_after_eq(jiffies, session->s_cap_ttl) && |
863 | time_after_eq(session->s_cap_ttl, session->s_renew_requested)) | 867 | time_after_eq(session->s_cap_ttl, session->s_renew_requested)) |
864 | pr_info("mds%d caps stale\n", session->s_mds); | 868 | pr_info("mds%d caps stale\n", session->s_mds); |
869 | session->s_renew_requested = jiffies; | ||
865 | 870 | ||
866 | /* do not try to renew caps until a recovering mds has reconnected | 871 | /* do not try to renew caps until a recovering mds has reconnected |
867 | * with its clients. */ | 872 | * with its clients. */ |
@@ -874,7 +879,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
874 | 879 | ||
875 | dout("send_renew_caps to mds%d (%s)\n", session->s_mds, | 880 | dout("send_renew_caps to mds%d (%s)\n", session->s_mds, |
876 | ceph_mds_state_name(state)); | 881 | ceph_mds_state_name(state)); |
877 | session->s_renew_requested = jiffies; | ||
878 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, | 882 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, |
879 | ++session->s_renew_seq); | 883 | ++session->s_renew_seq); |
880 | if (IS_ERR(msg)) | 884 | if (IS_ERR(msg)) |
@@ -1566,8 +1570,13 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1566 | 1570 | ||
1567 | /* get, open session */ | 1571 | /* get, open session */ |
1568 | session = __ceph_lookup_mds_session(mdsc, mds); | 1572 | session = __ceph_lookup_mds_session(mdsc, mds); |
1569 | if (!session) | 1573 | if (!session) { |
1570 | session = register_session(mdsc, mds); | 1574 | session = register_session(mdsc, mds); |
1575 | if (IS_ERR(session)) { | ||
1576 | err = PTR_ERR(session); | ||
1577 | goto finish; | ||
1578 | } | ||
1579 | } | ||
1571 | dout("do_request mds%d session %p state %s\n", mds, session, | 1580 | dout("do_request mds%d session %p state %s\n", mds, session, |
1572 | session_state_name(session->s_state)); | 1581 | session_state_name(session->s_state)); |
1573 | if (session->s_state != CEPH_MDS_SESSION_OPEN && | 1582 | if (session->s_state != CEPH_MDS_SESSION_OPEN && |
@@ -1770,7 +1779,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1770 | dout("handle_reply %p\n", req); | 1779 | dout("handle_reply %p\n", req); |
1771 | 1780 | ||
1772 | /* correct session? */ | 1781 | /* correct session? */ |
1773 | if (!req->r_session && req->r_session != session) { | 1782 | if (req->r_session != session) { |
1774 | pr_err("mdsc_handle_reply got %llu on session mds%d" | 1783 | pr_err("mdsc_handle_reply got %llu on session mds%d" |
1775 | " not mds%d\n", tid, session->s_mds, | 1784 | " not mds%d\n", tid, session->s_mds, |
1776 | req->r_session ? req->r_session->s_mds : -1); | 1785 | req->r_session ? req->r_session->s_mds : -1); |
@@ -2682,29 +2691,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) | |||
2682 | */ | 2691 | */ |
2683 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) | 2692 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) |
2684 | { | 2693 | { |
2685 | struct ceph_mds_request *req = NULL; | 2694 | struct ceph_mds_request *req = NULL, *nextreq; |
2686 | struct rb_node *n; | 2695 | struct rb_node *n; |
2687 | 2696 | ||
2688 | mutex_lock(&mdsc->mutex); | 2697 | mutex_lock(&mdsc->mutex); |
2689 | dout("wait_unsafe_requests want %lld\n", want_tid); | 2698 | dout("wait_unsafe_requests want %lld\n", want_tid); |
2699 | restart: | ||
2690 | req = __get_oldest_req(mdsc); | 2700 | req = __get_oldest_req(mdsc); |
2691 | while (req && req->r_tid <= want_tid) { | 2701 | while (req && req->r_tid <= want_tid) { |
2702 | /* find next request */ | ||
2703 | n = rb_next(&req->r_node); | ||
2704 | if (n) | ||
2705 | nextreq = rb_entry(n, struct ceph_mds_request, r_node); | ||
2706 | else | ||
2707 | nextreq = NULL; | ||
2692 | if ((req->r_op & CEPH_MDS_OP_WRITE)) { | 2708 | if ((req->r_op & CEPH_MDS_OP_WRITE)) { |
2693 | /* write op */ | 2709 | /* write op */ |
2694 | ceph_mdsc_get_request(req); | 2710 | ceph_mdsc_get_request(req); |
2711 | if (nextreq) | ||
2712 | ceph_mdsc_get_request(nextreq); | ||
2695 | mutex_unlock(&mdsc->mutex); | 2713 | mutex_unlock(&mdsc->mutex); |
2696 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", | 2714 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", |
2697 | req->r_tid, want_tid); | 2715 | req->r_tid, want_tid); |
2698 | wait_for_completion(&req->r_safe_completion); | 2716 | wait_for_completion(&req->r_safe_completion); |
2699 | mutex_lock(&mdsc->mutex); | 2717 | mutex_lock(&mdsc->mutex); |
2700 | n = rb_next(&req->r_node); | ||
2701 | ceph_mdsc_put_request(req); | 2718 | ceph_mdsc_put_request(req); |
2702 | } else { | 2719 | if (!nextreq) |
2703 | n = rb_next(&req->r_node); | 2720 | break; /* next dne before, so we're done! */ |
2721 | if (RB_EMPTY_NODE(&nextreq->r_node)) { | ||
2722 | /* next request was removed from tree */ | ||
2723 | ceph_mdsc_put_request(nextreq); | ||
2724 | goto restart; | ||
2725 | } | ||
2726 | ceph_mdsc_put_request(nextreq); /* won't go away */ | ||
2704 | } | 2727 | } |
2705 | if (!n) | 2728 | req = nextreq; |
2706 | break; | ||
2707 | req = rb_entry(n, struct ceph_mds_request, r_node); | ||
2708 | } | 2729 | } |
2709 | mutex_unlock(&mdsc->mutex); | 2730 | mutex_unlock(&mdsc->mutex); |
2710 | dout("wait_unsafe_requests done\n"); | 2731 | dout("wait_unsafe_requests done\n"); |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 781656a49bf8..a32f0f896d9f 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | |||
366 | } | 366 | } |
367 | 367 | ||
368 | /* | 368 | /* |
369 | * return true if this connection ever successfully opened | ||
370 | */ | ||
371 | bool ceph_con_opened(struct ceph_connection *con) | ||
372 | { | ||
373 | return con->connect_seq > 0; | ||
374 | } | ||
375 | |||
376 | /* | ||
369 | * generic get/put | 377 | * generic get/put |
370 | */ | 378 | */ |
371 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) | 379 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) |
@@ -830,13 +838,6 @@ static void prepare_read_connect(struct ceph_connection *con) | |||
830 | con->in_base_pos = 0; | 838 | con->in_base_pos = 0; |
831 | } | 839 | } |
832 | 840 | ||
833 | static void prepare_read_connect_retry(struct ceph_connection *con) | ||
834 | { | ||
835 | dout("prepare_read_connect_retry %p\n", con); | ||
836 | con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr) | ||
837 | + sizeof(con->peer_addr_for_me); | ||
838 | } | ||
839 | |||
840 | static void prepare_read_ack(struct ceph_connection *con) | 841 | static void prepare_read_ack(struct ceph_connection *con) |
841 | { | 842 | { |
842 | dout("prepare_read_ack %p\n", con); | 843 | dout("prepare_read_ack %p\n", con); |
@@ -1146,7 +1147,7 @@ static int process_connect(struct ceph_connection *con) | |||
1146 | } | 1147 | } |
1147 | con->auth_retry = 1; | 1148 | con->auth_retry = 1; |
1148 | prepare_write_connect(con->msgr, con, 0); | 1149 | prepare_write_connect(con->msgr, con, 0); |
1149 | prepare_read_connect_retry(con); | 1150 | prepare_read_connect(con); |
1150 | break; | 1151 | break; |
1151 | 1152 | ||
1152 | case CEPH_MSGR_TAG_RESETSESSION: | 1153 | case CEPH_MSGR_TAG_RESETSESSION: |
@@ -1843,8 +1844,6 @@ static void ceph_fault(struct ceph_connection *con) | |||
1843 | goto out; | 1844 | goto out; |
1844 | } | 1845 | } |
1845 | 1846 | ||
1846 | clear_bit(BUSY, &con->state); /* to avoid an improbable race */ | ||
1847 | |||
1848 | mutex_lock(&con->mutex); | 1847 | mutex_lock(&con->mutex); |
1849 | if (test_bit(CLOSED, &con->state)) | 1848 | if (test_bit(CLOSED, &con->state)) |
1850 | goto out_unlock; | 1849 | goto out_unlock; |
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 4caaa5911110..a343dae73cdc 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr, | |||
223 | struct ceph_connection *con); | 223 | struct ceph_connection *con); |
224 | extern void ceph_con_open(struct ceph_connection *con, | 224 | extern void ceph_con_open(struct ceph_connection *con, |
225 | struct ceph_entity_addr *addr); | 225 | struct ceph_entity_addr *addr); |
226 | extern bool ceph_con_opened(struct ceph_connection *con); | ||
226 | extern void ceph_con_close(struct ceph_connection *con); | 227 | extern void ceph_con_close(struct ceph_connection *con); |
227 | extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); | 228 | extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); |
228 | extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); | 229 | extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index dbe63db9762f..c7b4dedaace6 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) | |||
413 | */ | 413 | */ |
414 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | 414 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) |
415 | { | 415 | { |
416 | struct ceph_osd_request *req; | ||
416 | int ret = 0; | 417 | int ret = 0; |
417 | 418 | ||
418 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); | 419 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); |
419 | if (list_empty(&osd->o_requests)) { | 420 | if (list_empty(&osd->o_requests)) { |
420 | __remove_osd(osdc, osd); | 421 | __remove_osd(osdc, osd); |
422 | } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], | ||
423 | &osd->o_con.peer_addr, | ||
424 | sizeof(osd->o_con.peer_addr)) == 0 && | ||
425 | !ceph_con_opened(&osd->o_con)) { | ||
426 | dout(" osd addr hasn't changed and connection never opened," | ||
427 | " letting msgr retry"); | ||
428 | /* touch each r_stamp for handle_timeout()'s benfit */ | ||
429 | list_for_each_entry(req, &osd->o_requests, r_osd_item) | ||
430 | req->r_stamp = jiffies; | ||
431 | ret = -EAGAIN; | ||
421 | } else { | 432 | } else { |
422 | ceph_con_close(&osd->o_con); | 433 | ceph_con_close(&osd->o_con); |
423 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); | 434 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); |
@@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc, | |||
633 | reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ | 644 | reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ |
634 | reqhead->reassert_version = req->r_reassert_version; | 645 | reqhead->reassert_version = req->r_reassert_version; |
635 | 646 | ||
636 | req->r_sent_stamp = jiffies; | 647 | req->r_stamp = jiffies; |
637 | list_move_tail(&osdc->req_lru, &req->r_req_lru_item); | 648 | list_move_tail(&osdc->req_lru, &req->r_req_lru_item); |
638 | 649 | ||
639 | ceph_msg_get(req->r_request); /* send consumes a ref */ | 650 | ceph_msg_get(req->r_request); /* send consumes a ref */ |
@@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work) | |||
660 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; | 671 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; |
661 | unsigned long keepalive = | 672 | unsigned long keepalive = |
662 | osdc->client->mount_args->osd_keepalive_timeout * HZ; | 673 | osdc->client->mount_args->osd_keepalive_timeout * HZ; |
663 | unsigned long last_sent = 0; | 674 | unsigned long last_stamp = 0; |
664 | struct rb_node *p; | 675 | struct rb_node *p; |
665 | struct list_head slow_osds; | 676 | struct list_head slow_osds; |
666 | 677 | ||
@@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work) | |||
697 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | 708 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, |
698 | r_req_lru_item); | 709 | r_req_lru_item); |
699 | 710 | ||
700 | if (time_before(jiffies, req->r_sent_stamp + timeout)) | 711 | if (time_before(jiffies, req->r_stamp + timeout)) |
701 | break; | 712 | break; |
702 | 713 | ||
703 | BUG_ON(req == last_req && req->r_sent_stamp == last_sent); | 714 | BUG_ON(req == last_req && req->r_stamp == last_stamp); |
704 | last_req = req; | 715 | last_req = req; |
705 | last_sent = req->r_sent_stamp; | 716 | last_stamp = req->r_stamp; |
706 | 717 | ||
707 | osd = req->r_osd; | 718 | osd = req->r_osd; |
708 | BUG_ON(!osd); | 719 | BUG_ON(!osd); |
@@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work) | |||
718 | */ | 729 | */ |
719 | INIT_LIST_HEAD(&slow_osds); | 730 | INIT_LIST_HEAD(&slow_osds); |
720 | list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { | 731 | list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { |
721 | if (time_before(jiffies, req->r_sent_stamp + keepalive)) | 732 | if (time_before(jiffies, req->r_stamp + keepalive)) |
722 | break; | 733 | break; |
723 | 734 | ||
724 | osd = req->r_osd; | 735 | osd = req->r_osd; |
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||
862 | 873 | ||
863 | dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); | 874 | dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); |
864 | if (kickosd) { | 875 | if (kickosd) { |
865 | __reset_osd(osdc, kickosd); | 876 | err = __reset_osd(osdc, kickosd); |
877 | if (err == -EAGAIN) | ||
878 | return 1; | ||
866 | } else { | 879 | } else { |
867 | for (p = rb_first(&osdc->osds); p; p = n) { | 880 | for (p = rb_first(&osdc->osds); p; p = n) { |
868 | struct ceph_osd *osd = | 881 | struct ceph_osd *osd = |
@@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||
913 | 926 | ||
914 | kick: | 927 | kick: |
915 | dout("kicking %p tid %llu osd%d\n", req, req->r_tid, | 928 | dout("kicking %p tid %llu osd%d\n", req, req->r_tid, |
916 | req->r_osd->o_osd); | 929 | req->r_osd ? req->r_osd->o_osd : -1); |
917 | req->r_flags |= CEPH_OSD_FLAG_RETRY; | 930 | req->r_flags |= CEPH_OSD_FLAG_RETRY; |
918 | err = __send_request(osdc, req); | 931 | err = __send_request(osdc, req); |
919 | if (err) { | 932 | if (err) { |
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index 1b1a3ca43afc..b0759911e7c3 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h | |||
@@ -70,7 +70,7 @@ struct ceph_osd_request { | |||
70 | 70 | ||
71 | char r_oid[40]; /* object name */ | 71 | char r_oid[40]; /* object name */ |
72 | int r_oid_len; | 72 | int r_oid_len; |
73 | unsigned long r_sent_stamp; | 73 | unsigned long r_stamp; /* send OR check time */ |
74 | bool r_resend; /* msg send failed, needs retry */ | 74 | bool r_resend; /* msg send failed, needs retry */ |
75 | 75 | ||
76 | struct ceph_file_layout r_file_layout; | 76 | struct ceph_file_layout r_file_layout; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index b83f2692b835..d82fe87c2a6e 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -480,6 +480,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||
480 | return NULL; | 480 | return NULL; |
481 | } | 481 | } |
482 | 482 | ||
483 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | ||
484 | { | ||
485 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | ||
486 | calc_pg_masks(pi); | ||
487 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | ||
488 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | ||
489 | } | ||
490 | |||
483 | /* | 491 | /* |
484 | * decode a full map. | 492 | * decode a full map. |
485 | */ | 493 | */ |
@@ -526,12 +534,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
526 | ev, CEPH_PG_POOL_VERSION); | 534 | ev, CEPH_PG_POOL_VERSION); |
527 | goto bad; | 535 | goto bad; |
528 | } | 536 | } |
529 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 537 | __decode_pool(p, pi); |
530 | __insert_pg_pool(&map->pg_pools, pi); | 538 | __insert_pg_pool(&map->pg_pools, pi); |
531 | calc_pg_masks(pi); | ||
532 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | ||
533 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) | ||
534 | * sizeof(u64) * 2; | ||
535 | } | 539 | } |
536 | ceph_decode_32_safe(p, end, map->pool_max, bad); | 540 | ceph_decode_32_safe(p, end, map->pool_max, bad); |
537 | 541 | ||
@@ -714,8 +718,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
714 | pi->id = pool; | 718 | pi->id = pool; |
715 | __insert_pg_pool(&map->pg_pools, pi); | 719 | __insert_pg_pool(&map->pg_pools, pi); |
716 | } | 720 | } |
717 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 721 | __decode_pool(p, pi); |
718 | calc_pg_masks(pi); | ||
719 | } | 722 | } |
720 | 723 | ||
721 | /* old_pool */ | 724 | /* old_pool */ |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index bf2a5f3846a4..df04e210a055 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -314,9 +314,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) | |||
314 | because we rebuild_snap_realms() works _downward_ in | 314 | because we rebuild_snap_realms() works _downward_ in |
315 | hierarchy after each update.) */ | 315 | hierarchy after each update.) */ |
316 | if (realm->cached_context && | 316 | if (realm->cached_context && |
317 | realm->cached_context->seq <= realm->seq && | 317 | realm->cached_context->seq == realm->seq && |
318 | (!parent || | 318 | (!parent || |
319 | realm->cached_context->seq <= parent->cached_context->seq)) { | 319 | realm->cached_context->seq >= parent->cached_context->seq)) { |
320 | dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" | 320 | dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" |
321 | " (unchanged)\n", | 321 | " (unchanged)\n", |
322 | realm->ino, realm, realm->cached_context, | 322 | realm->ino, realm, realm->cached_context, |
@@ -818,7 +818,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
818 | * queued (again) by ceph_update_snap_trace() | 818 | * queued (again) by ceph_update_snap_trace() |
819 | * below. Queue it _now_, under the old context. | 819 | * below. Queue it _now_, under the old context. |
820 | */ | 820 | */ |
821 | spin_lock(&realm->inodes_with_caps_lock); | ||
821 | list_del_init(&ci->i_snap_realm_item); | 822 | list_del_init(&ci->i_snap_realm_item); |
823 | spin_unlock(&realm->inodes_with_caps_lock); | ||
822 | spin_unlock(&inode->i_lock); | 824 | spin_unlock(&inode->i_lock); |
823 | 825 | ||
824 | ceph_queue_cap_snap(ci, | 826 | ceph_queue_cap_snap(ci, |