diff options
Diffstat (limited to 'fs')
38 files changed, 529 insertions, 268 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 23bb0ceabe31..ce8ef6107727 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -919,6 +919,10 @@ static int context_is_writeable_or_written(struct inode *inode, | |||
919 | /* | 919 | /* |
920 | * We are only allowed to write into/dirty the page if the page is | 920 | * We are only allowed to write into/dirty the page if the page is |
921 | * clean, or already dirty within the same snap context. | 921 | * clean, or already dirty within the same snap context. |
922 | * | ||
923 | * called with page locked. | ||
924 | * return success with page locked, | ||
925 | * or any failure (incl -EAGAIN) with page unlocked. | ||
922 | */ | 926 | */ |
923 | static int ceph_update_writeable_page(struct file *file, | 927 | static int ceph_update_writeable_page(struct file *file, |
924 | loff_t pos, unsigned len, | 928 | loff_t pos, unsigned len, |
@@ -961,9 +965,11 @@ retry_locked: | |||
961 | snapc = ceph_get_snap_context((void *)page->private); | 965 | snapc = ceph_get_snap_context((void *)page->private); |
962 | unlock_page(page); | 966 | unlock_page(page); |
963 | ceph_queue_writeback(inode); | 967 | ceph_queue_writeback(inode); |
964 | wait_event_interruptible(ci->i_cap_wq, | 968 | r = wait_event_interruptible(ci->i_cap_wq, |
965 | context_is_writeable_or_written(inode, snapc)); | 969 | context_is_writeable_or_written(inode, snapc)); |
966 | ceph_put_snap_context(snapc); | 970 | ceph_put_snap_context(snapc); |
971 | if (r == -ERESTARTSYS) | ||
972 | return r; | ||
967 | return -EAGAIN; | 973 | return -EAGAIN; |
968 | } | 974 | } |
969 | 975 | ||
@@ -1035,7 +1041,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1035 | int r; | 1041 | int r; |
1036 | 1042 | ||
1037 | do { | 1043 | do { |
1038 | /* get a page*/ | 1044 | /* get a page */ |
1039 | page = grab_cache_page_write_begin(mapping, index, 0); | 1045 | page = grab_cache_page_write_begin(mapping, index, 0); |
1040 | if (!page) | 1046 | if (!page) |
1041 | return -ENOMEM; | 1047 | return -ENOMEM; |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index f0318427b6da..8d8a84964763 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -28,6 +28,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) | |||
28 | return (ac->want_keys & xi->have_keys) == ac->want_keys; | 28 | return (ac->want_keys & xi->have_keys) == ac->want_keys; |
29 | } | 29 | } |
30 | 30 | ||
31 | static int ceph_x_encrypt_buflen(int ilen) | ||
32 | { | ||
33 | return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + | ||
34 | sizeof(u32); | ||
35 | } | ||
36 | |||
31 | static int ceph_x_encrypt(struct ceph_crypto_key *secret, | 37 | static int ceph_x_encrypt(struct ceph_crypto_key *secret, |
32 | void *ibuf, int ilen, void *obuf, size_t olen) | 38 | void *ibuf, int ilen, void *obuf, size_t olen) |
33 | { | 39 | { |
@@ -150,6 +156,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
150 | struct timespec validity; | 156 | struct timespec validity; |
151 | struct ceph_crypto_key old_key; | 157 | struct ceph_crypto_key old_key; |
152 | void *tp, *tpend; | 158 | void *tp, *tpend; |
159 | struct ceph_timespec new_validity; | ||
160 | struct ceph_crypto_key new_session_key; | ||
161 | struct ceph_buffer *new_ticket_blob; | ||
162 | unsigned long new_expires, new_renew_after; | ||
163 | u64 new_secret_id; | ||
153 | 164 | ||
154 | ceph_decode_need(&p, end, sizeof(u32) + 1, bad); | 165 | ceph_decode_need(&p, end, sizeof(u32) + 1, bad); |
155 | 166 | ||
@@ -182,16 +193,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
182 | goto bad; | 193 | goto bad; |
183 | 194 | ||
184 | memcpy(&old_key, &th->session_key, sizeof(old_key)); | 195 | memcpy(&old_key, &th->session_key, sizeof(old_key)); |
185 | ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); | 196 | ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); |
186 | if (ret) | 197 | if (ret) |
187 | goto out; | 198 | goto out; |
188 | 199 | ||
189 | ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); | 200 | ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); |
190 | ceph_decode_timespec(&validity, &th->validity); | 201 | ceph_decode_timespec(&validity, &new_validity); |
191 | th->expires = get_seconds() + validity.tv_sec; | 202 | new_expires = get_seconds() + validity.tv_sec; |
192 | th->renew_after = th->expires - (validity.tv_sec / 4); | 203 | new_renew_after = new_expires - (validity.tv_sec / 4); |
193 | dout(" expires=%lu renew_after=%lu\n", th->expires, | 204 | dout(" expires=%lu renew_after=%lu\n", new_expires, |
194 | th->renew_after); | 205 | new_renew_after); |
195 | 206 | ||
196 | /* ticket blob for service */ | 207 | /* ticket blob for service */ |
197 | ceph_decode_8_safe(&p, end, is_enc, bad); | 208 | ceph_decode_8_safe(&p, end, is_enc, bad); |
@@ -216,10 +227,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
216 | dout(" ticket blob is %d bytes\n", dlen); | 227 | dout(" ticket blob is %d bytes\n", dlen); |
217 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); | 228 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); |
218 | struct_v = ceph_decode_8(&tp); | 229 | struct_v = ceph_decode_8(&tp); |
219 | th->secret_id = ceph_decode_64(&tp); | 230 | new_secret_id = ceph_decode_64(&tp); |
220 | ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); | 231 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); |
221 | if (ret) | 232 | if (ret) |
222 | goto out; | 233 | goto out; |
234 | |||
235 | /* all is well, update our ticket */ | ||
236 | ceph_crypto_key_destroy(&th->session_key); | ||
237 | if (th->ticket_blob) | ||
238 | ceph_buffer_put(th->ticket_blob); | ||
239 | th->session_key = new_session_key; | ||
240 | th->ticket_blob = new_ticket_blob; | ||
241 | th->validity = new_validity; | ||
242 | th->secret_id = new_secret_id; | ||
243 | th->expires = new_expires; | ||
244 | th->renew_after = new_renew_after; | ||
223 | dout(" got ticket service %d (%s) secret_id %lld len %d\n", | 245 | dout(" got ticket service %d (%s) secret_id %lld len %d\n", |
224 | type, ceph_entity_type_name(type), th->secret_id, | 246 | type, ceph_entity_type_name(type), th->secret_id, |
225 | (int)th->ticket_blob->vec.iov_len); | 247 | (int)th->ticket_blob->vec.iov_len); |
@@ -242,7 +264,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, | |||
242 | struct ceph_x_ticket_handler *th, | 264 | struct ceph_x_ticket_handler *th, |
243 | struct ceph_x_authorizer *au) | 265 | struct ceph_x_authorizer *au) |
244 | { | 266 | { |
245 | int len; | 267 | int maxlen; |
246 | struct ceph_x_authorize_a *msg_a; | 268 | struct ceph_x_authorize_a *msg_a; |
247 | struct ceph_x_authorize_b msg_b; | 269 | struct ceph_x_authorize_b msg_b; |
248 | void *p, *end; | 270 | void *p, *end; |
@@ -253,15 +275,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, | |||
253 | dout("build_authorizer for %s %p\n", | 275 | dout("build_authorizer for %s %p\n", |
254 | ceph_entity_type_name(th->service), au); | 276 | ceph_entity_type_name(th->service), au); |
255 | 277 | ||
256 | len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + | 278 | maxlen = sizeof(*msg_a) + sizeof(msg_b) + |
257 | ticket_blob_len + 16; | 279 | ceph_x_encrypt_buflen(ticket_blob_len); |
258 | dout(" need len %d\n", len); | 280 | dout(" need len %d\n", maxlen); |
259 | if (au->buf && au->buf->alloc_len < len) { | 281 | if (au->buf && au->buf->alloc_len < maxlen) { |
260 | ceph_buffer_put(au->buf); | 282 | ceph_buffer_put(au->buf); |
261 | au->buf = NULL; | 283 | au->buf = NULL; |
262 | } | 284 | } |
263 | if (!au->buf) { | 285 | if (!au->buf) { |
264 | au->buf = ceph_buffer_new(len, GFP_NOFS); | 286 | au->buf = ceph_buffer_new(maxlen, GFP_NOFS); |
265 | if (!au->buf) | 287 | if (!au->buf) |
266 | return -ENOMEM; | 288 | return -ENOMEM; |
267 | } | 289 | } |
@@ -296,6 +318,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, | |||
296 | au->buf->vec.iov_len = p - au->buf->vec.iov_base; | 318 | au->buf->vec.iov_len = p - au->buf->vec.iov_base; |
297 | dout(" built authorizer nonce %llx len %d\n", au->nonce, | 319 | dout(" built authorizer nonce %llx len %d\n", au->nonce, |
298 | (int)au->buf->vec.iov_len); | 320 | (int)au->buf->vec.iov_len); |
321 | BUG_ON(au->buf->vec.iov_len > maxlen); | ||
299 | return 0; | 322 | return 0; |
300 | 323 | ||
301 | out_buf: | 324 | out_buf: |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index db122bb357b8..7d0a0d0adc18 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1407,6 +1407,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1407 | */ | 1407 | */ |
1408 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1408 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1409 | struct ceph_mds_session *session) | 1409 | struct ceph_mds_session *session) |
1410 | __releases(session->s_mutex) | ||
1410 | { | 1411 | { |
1411 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1412 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); |
1412 | struct ceph_mds_client *mdsc = &client->mdsc; | 1413 | struct ceph_mds_client *mdsc = &client->mdsc; |
@@ -1414,7 +1415,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1414 | struct ceph_cap *cap; | 1415 | struct ceph_cap *cap; |
1415 | int file_wanted, used; | 1416 | int file_wanted, used; |
1416 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ | 1417 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ |
1417 | int drop_session_lock = session ? 0 : 1; | ||
1418 | int issued, implemented, want, retain, revoking, flushing = 0; | 1418 | int issued, implemented, want, retain, revoking, flushing = 0; |
1419 | int mds = -1; /* keep track of how far we've gone through i_caps list | 1419 | int mds = -1; /* keep track of how far we've gone through i_caps list |
1420 | to avoid an infinite loop on retry */ | 1420 | to avoid an infinite loop on retry */ |
@@ -1639,7 +1639,7 @@ ack: | |||
1639 | if (queue_invalidate) | 1639 | if (queue_invalidate) |
1640 | ceph_queue_invalidate(inode); | 1640 | ceph_queue_invalidate(inode); |
1641 | 1641 | ||
1642 | if (session && drop_session_lock) | 1642 | if (session) |
1643 | mutex_unlock(&session->s_mutex); | 1643 | mutex_unlock(&session->s_mutex); |
1644 | if (took_snap_rwsem) | 1644 | if (took_snap_rwsem) |
1645 | up_read(&mdsc->snap_rwsem); | 1645 | up_read(&mdsc->snap_rwsem); |
@@ -2195,18 +2195,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2195 | * Handle a cap GRANT message from the MDS. (Note that a GRANT may | 2195 | * Handle a cap GRANT message from the MDS. (Note that a GRANT may |
2196 | * actually be a revocation if it specifies a smaller cap set.) | 2196 | * actually be a revocation if it specifies a smaller cap set.) |
2197 | * | 2197 | * |
2198 | * caller holds s_mutex. | 2198 | * caller holds s_mutex and i_lock, we drop both. |
2199 | * | ||
2199 | * return value: | 2200 | * return value: |
2200 | * 0 - ok | 2201 | * 0 - ok |
2201 | * 1 - check_caps on auth cap only (writeback) | 2202 | * 1 - check_caps on auth cap only (writeback) |
2202 | * 2 - check_caps (ack revoke) | 2203 | * 2 - check_caps (ack revoke) |
2203 | */ | 2204 | */ |
2204 | static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | 2205 | static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, |
2205 | struct ceph_mds_session *session, | 2206 | struct ceph_mds_session *session, |
2206 | struct ceph_cap *cap, | 2207 | struct ceph_cap *cap, |
2207 | struct ceph_buffer *xattr_buf) | 2208 | struct ceph_buffer *xattr_buf) |
2208 | __releases(inode->i_lock) | 2209 | __releases(inode->i_lock) |
2209 | 2210 | __releases(session->s_mutex) | |
2210 | { | 2211 | { |
2211 | struct ceph_inode_info *ci = ceph_inode(inode); | 2212 | struct ceph_inode_info *ci = ceph_inode(inode); |
2212 | int mds = session->s_mds; | 2213 | int mds = session->s_mds; |
@@ -2216,7 +2217,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2216 | u64 size = le64_to_cpu(grant->size); | 2217 | u64 size = le64_to_cpu(grant->size); |
2217 | u64 max_size = le64_to_cpu(grant->max_size); | 2218 | u64 max_size = le64_to_cpu(grant->max_size); |
2218 | struct timespec mtime, atime, ctime; | 2219 | struct timespec mtime, atime, ctime; |
2219 | int reply = 0; | 2220 | int check_caps = 0; |
2220 | int wake = 0; | 2221 | int wake = 0; |
2221 | int writeback = 0; | 2222 | int writeback = 0; |
2222 | int revoked_rdcache = 0; | 2223 | int revoked_rdcache = 0; |
@@ -2329,11 +2330,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2329 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) | 2330 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) |
2330 | writeback = 1; /* will delay ack */ | 2331 | writeback = 1; /* will delay ack */ |
2331 | else if (dirty & ~newcaps) | 2332 | else if (dirty & ~newcaps) |
2332 | reply = 1; /* initiate writeback in check_caps */ | 2333 | check_caps = 1; /* initiate writeback in check_caps */ |
2333 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || | 2334 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || |
2334 | revoked_rdcache) | 2335 | revoked_rdcache) |
2335 | reply = 2; /* send revoke ack in check_caps */ | 2336 | check_caps = 2; /* send revoke ack in check_caps */ |
2336 | cap->issued = newcaps; | 2337 | cap->issued = newcaps; |
2338 | cap->implemented |= newcaps; | ||
2337 | } else if (cap->issued == newcaps) { | 2339 | } else if (cap->issued == newcaps) { |
2338 | dout("caps unchanged: %s -> %s\n", | 2340 | dout("caps unchanged: %s -> %s\n", |
2339 | ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); | 2341 | ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); |
@@ -2346,6 +2348,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2346 | * pending revocation */ | 2348 | * pending revocation */ |
2347 | wake = 1; | 2349 | wake = 1; |
2348 | } | 2350 | } |
2351 | BUG_ON(cap->issued & ~cap->implemented); | ||
2349 | 2352 | ||
2350 | spin_unlock(&inode->i_lock); | 2353 | spin_unlock(&inode->i_lock); |
2351 | if (writeback) | 2354 | if (writeback) |
@@ -2359,7 +2362,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2359 | ceph_queue_invalidate(inode); | 2362 | ceph_queue_invalidate(inode); |
2360 | if (wake) | 2363 | if (wake) |
2361 | wake_up(&ci->i_cap_wq); | 2364 | wake_up(&ci->i_cap_wq); |
2362 | return reply; | 2365 | |
2366 | if (check_caps == 1) | ||
2367 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, | ||
2368 | session); | ||
2369 | else if (check_caps == 2) | ||
2370 | ceph_check_caps(ci, CHECK_CAPS_NODELAY, session); | ||
2371 | else | ||
2372 | mutex_unlock(&session->s_mutex); | ||
2363 | } | 2373 | } |
2364 | 2374 | ||
2365 | /* | 2375 | /* |
@@ -2548,9 +2558,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2548 | ci->i_cap_exporting_issued = cap->issued; | 2558 | ci->i_cap_exporting_issued = cap->issued; |
2549 | } | 2559 | } |
2550 | __ceph_remove_cap(cap); | 2560 | __ceph_remove_cap(cap); |
2551 | } else { | ||
2552 | WARN_ON(!cap); | ||
2553 | } | 2561 | } |
2562 | /* else, we already released it */ | ||
2554 | 2563 | ||
2555 | spin_unlock(&inode->i_lock); | 2564 | spin_unlock(&inode->i_lock); |
2556 | } | 2565 | } |
@@ -2621,9 +2630,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2621 | u64 cap_id; | 2630 | u64 cap_id; |
2622 | u64 size, max_size; | 2631 | u64 size, max_size; |
2623 | u64 tid; | 2632 | u64 tid; |
2624 | int check_caps = 0; | ||
2625 | void *snaptrace; | 2633 | void *snaptrace; |
2626 | int r; | ||
2627 | 2634 | ||
2628 | dout("handle_caps from mds%d\n", mds); | 2635 | dout("handle_caps from mds%d\n", mds); |
2629 | 2636 | ||
@@ -2668,8 +2675,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2668 | case CEPH_CAP_OP_IMPORT: | 2675 | case CEPH_CAP_OP_IMPORT: |
2669 | handle_cap_import(mdsc, inode, h, session, | 2676 | handle_cap_import(mdsc, inode, h, session, |
2670 | snaptrace, le32_to_cpu(h->snap_trace_len)); | 2677 | snaptrace, le32_to_cpu(h->snap_trace_len)); |
2671 | check_caps = 1; /* we may have sent a RELEASE to the old auth */ | 2678 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, |
2672 | goto done; | 2679 | session); |
2680 | goto done_unlocked; | ||
2673 | } | 2681 | } |
2674 | 2682 | ||
2675 | /* the rest require a cap */ | 2683 | /* the rest require a cap */ |
@@ -2686,16 +2694,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2686 | switch (op) { | 2694 | switch (op) { |
2687 | case CEPH_CAP_OP_REVOKE: | 2695 | case CEPH_CAP_OP_REVOKE: |
2688 | case CEPH_CAP_OP_GRANT: | 2696 | case CEPH_CAP_OP_GRANT: |
2689 | r = handle_cap_grant(inode, h, session, cap, msg->middle); | 2697 | handle_cap_grant(inode, h, session, cap, msg->middle); |
2690 | if (r == 1) | 2698 | goto done_unlocked; |
2691 | ceph_check_caps(ceph_inode(inode), | ||
2692 | CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, | ||
2693 | session); | ||
2694 | else if (r == 2) | ||
2695 | ceph_check_caps(ceph_inode(inode), | ||
2696 | CHECK_CAPS_NODELAY, | ||
2697 | session); | ||
2698 | break; | ||
2699 | 2699 | ||
2700 | case CEPH_CAP_OP_FLUSH_ACK: | 2700 | case CEPH_CAP_OP_FLUSH_ACK: |
2701 | handle_cap_flush_ack(inode, tid, h, session, cap); | 2701 | handle_cap_flush_ack(inode, tid, h, session, cap); |
@@ -2713,9 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2713 | 2713 | ||
2714 | done: | 2714 | done: |
2715 | mutex_unlock(&session->s_mutex); | 2715 | mutex_unlock(&session->s_mutex); |
2716 | 2716 | done_unlocked: | |
2717 | if (check_caps) | ||
2718 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL); | ||
2719 | if (inode) | 2717 | if (inode) |
2720 | iput(inode); | 2718 | iput(inode); |
2721 | return; | 2719 | return; |
@@ -2838,11 +2836,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode, | |||
2838 | struct ceph_cap *cap; | 2836 | struct ceph_cap *cap; |
2839 | struct ceph_mds_request_release *rel = *p; | 2837 | struct ceph_mds_request_release *rel = *p; |
2840 | int ret = 0; | 2838 | int ret = 0; |
2841 | 2839 | int used = 0; | |
2842 | dout("encode_inode_release %p mds%d drop %s unless %s\n", inode, | ||
2843 | mds, ceph_cap_string(drop), ceph_cap_string(unless)); | ||
2844 | 2840 | ||
2845 | spin_lock(&inode->i_lock); | 2841 | spin_lock(&inode->i_lock); |
2842 | used = __ceph_caps_used(ci); | ||
2843 | |||
2844 | dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, | ||
2845 | mds, ceph_cap_string(used), ceph_cap_string(drop), | ||
2846 | ceph_cap_string(unless)); | ||
2847 | |||
2848 | /* only drop unused caps */ | ||
2849 | drop &= ~used; | ||
2850 | |||
2846 | cap = __get_cap_for_mds(ci, mds); | 2851 | cap = __get_cap_for_mds(ci, mds); |
2847 | if (cap && __cap_is_valid(cap)) { | 2852 | if (cap && __cap_is_valid(cap)) { |
2848 | if (force || | 2853 | if (force || |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5107384ee029..8a9116e15b70 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -288,8 +288,10 @@ more: | |||
288 | CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; | 288 | CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; |
289 | 289 | ||
290 | /* discard old result, if any */ | 290 | /* discard old result, if any */ |
291 | if (fi->last_readdir) | 291 | if (fi->last_readdir) { |
292 | ceph_mdsc_put_request(fi->last_readdir); | 292 | ceph_mdsc_put_request(fi->last_readdir); |
293 | fi->last_readdir = NULL; | ||
294 | } | ||
293 | 295 | ||
294 | /* requery frag tree, as the frag topology may have changed */ | 296 | /* requery frag tree, as the frag topology may have changed */ |
295 | frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); | 297 | frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7abe1aed819b..aca82d55cc53 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode) | |||
378 | 378 | ||
379 | ceph_queue_caps_release(inode); | 379 | ceph_queue_caps_release(inode); |
380 | 380 | ||
381 | /* | ||
382 | * we may still have a snap_realm reference if there are stray | ||
383 | * caps in i_cap_exporting_issued or i_snap_caps. | ||
384 | */ | ||
385 | if (ci->i_snap_realm) { | ||
386 | struct ceph_mds_client *mdsc = | ||
387 | &ceph_client(ci->vfs_inode.i_sb)->mdsc; | ||
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | ||
389 | |||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | ||
391 | spin_lock(&realm->inodes_with_caps_lock); | ||
392 | list_del_init(&ci->i_snap_realm_item); | ||
393 | spin_unlock(&realm->inodes_with_caps_lock); | ||
394 | ceph_put_snap_realm(mdsc, realm); | ||
395 | } | ||
396 | |||
381 | kfree(ci->i_symlink); | 397 | kfree(ci->i_symlink); |
382 | while ((n = rb_first(&ci->i_fragtree)) != NULL) { | 398 | while ((n = rb_first(&ci->i_fragtree)) != NULL) { |
383 | frag = rb_entry(n, struct ceph_inode_frag, node); | 399 | frag = rb_entry(n, struct ceph_inode_frag, node); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a2600101ec22..5c7920be6420 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -328,6 +328,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
328 | struct ceph_mds_session *s; | 328 | struct ceph_mds_session *s; |
329 | 329 | ||
330 | s = kzalloc(sizeof(*s), GFP_NOFS); | 330 | s = kzalloc(sizeof(*s), GFP_NOFS); |
331 | if (!s) | ||
332 | return ERR_PTR(-ENOMEM); | ||
331 | s->s_mdsc = mdsc; | 333 | s->s_mdsc = mdsc; |
332 | s->s_mds = mds; | 334 | s->s_mds = mds; |
333 | s->s_state = CEPH_MDS_SESSION_NEW; | 335 | s->s_state = CEPH_MDS_SESSION_NEW; |
@@ -529,7 +531,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
529 | { | 531 | { |
530 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); | 532 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); |
531 | rb_erase(&req->r_node, &mdsc->request_tree); | 533 | rb_erase(&req->r_node, &mdsc->request_tree); |
532 | ceph_mdsc_put_request(req); | 534 | RB_CLEAR_NODE(&req->r_node); |
533 | 535 | ||
534 | if (req->r_unsafe_dir) { | 536 | if (req->r_unsafe_dir) { |
535 | struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); | 537 | struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); |
@@ -538,6 +540,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
538 | list_del_init(&req->r_unsafe_dir_item); | 540 | list_del_init(&req->r_unsafe_dir_item); |
539 | spin_unlock(&ci->i_unsafe_lock); | 541 | spin_unlock(&ci->i_unsafe_lock); |
540 | } | 542 | } |
543 | |||
544 | ceph_mdsc_put_request(req); | ||
541 | } | 545 | } |
542 | 546 | ||
543 | /* | 547 | /* |
@@ -862,6 +866,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
862 | if (time_after_eq(jiffies, session->s_cap_ttl) && | 866 | if (time_after_eq(jiffies, session->s_cap_ttl) && |
863 | time_after_eq(session->s_cap_ttl, session->s_renew_requested)) | 867 | time_after_eq(session->s_cap_ttl, session->s_renew_requested)) |
864 | pr_info("mds%d caps stale\n", session->s_mds); | 868 | pr_info("mds%d caps stale\n", session->s_mds); |
869 | session->s_renew_requested = jiffies; | ||
865 | 870 | ||
866 | /* do not try to renew caps until a recovering mds has reconnected | 871 | /* do not try to renew caps until a recovering mds has reconnected |
867 | * with its clients. */ | 872 | * with its clients. */ |
@@ -874,7 +879,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
874 | 879 | ||
875 | dout("send_renew_caps to mds%d (%s)\n", session->s_mds, | 880 | dout("send_renew_caps to mds%d (%s)\n", session->s_mds, |
876 | ceph_mds_state_name(state)); | 881 | ceph_mds_state_name(state)); |
877 | session->s_renew_requested = jiffies; | ||
878 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, | 882 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, |
879 | ++session->s_renew_seq); | 883 | ++session->s_renew_seq); |
880 | if (IS_ERR(msg)) | 884 | if (IS_ERR(msg)) |
@@ -1566,8 +1570,13 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1566 | 1570 | ||
1567 | /* get, open session */ | 1571 | /* get, open session */ |
1568 | session = __ceph_lookup_mds_session(mdsc, mds); | 1572 | session = __ceph_lookup_mds_session(mdsc, mds); |
1569 | if (!session) | 1573 | if (!session) { |
1570 | session = register_session(mdsc, mds); | 1574 | session = register_session(mdsc, mds); |
1575 | if (IS_ERR(session)) { | ||
1576 | err = PTR_ERR(session); | ||
1577 | goto finish; | ||
1578 | } | ||
1579 | } | ||
1571 | dout("do_request mds%d session %p state %s\n", mds, session, | 1580 | dout("do_request mds%d session %p state %s\n", mds, session, |
1572 | session_state_name(session->s_state)); | 1581 | session_state_name(session->s_state)); |
1573 | if (session->s_state != CEPH_MDS_SESSION_OPEN && | 1582 | if (session->s_state != CEPH_MDS_SESSION_OPEN && |
@@ -1770,7 +1779,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1770 | dout("handle_reply %p\n", req); | 1779 | dout("handle_reply %p\n", req); |
1771 | 1780 | ||
1772 | /* correct session? */ | 1781 | /* correct session? */ |
1773 | if (!req->r_session && req->r_session != session) { | 1782 | if (req->r_session != session) { |
1774 | pr_err("mdsc_handle_reply got %llu on session mds%d" | 1783 | pr_err("mdsc_handle_reply got %llu on session mds%d" |
1775 | " not mds%d\n", tid, session->s_mds, | 1784 | " not mds%d\n", tid, session->s_mds, |
1776 | req->r_session ? req->r_session->s_mds : -1); | 1785 | req->r_session ? req->r_session->s_mds : -1); |
@@ -2682,29 +2691,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) | |||
2682 | */ | 2691 | */ |
2683 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) | 2692 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) |
2684 | { | 2693 | { |
2685 | struct ceph_mds_request *req = NULL; | 2694 | struct ceph_mds_request *req = NULL, *nextreq; |
2686 | struct rb_node *n; | 2695 | struct rb_node *n; |
2687 | 2696 | ||
2688 | mutex_lock(&mdsc->mutex); | 2697 | mutex_lock(&mdsc->mutex); |
2689 | dout("wait_unsafe_requests want %lld\n", want_tid); | 2698 | dout("wait_unsafe_requests want %lld\n", want_tid); |
2699 | restart: | ||
2690 | req = __get_oldest_req(mdsc); | 2700 | req = __get_oldest_req(mdsc); |
2691 | while (req && req->r_tid <= want_tid) { | 2701 | while (req && req->r_tid <= want_tid) { |
2702 | /* find next request */ | ||
2703 | n = rb_next(&req->r_node); | ||
2704 | if (n) | ||
2705 | nextreq = rb_entry(n, struct ceph_mds_request, r_node); | ||
2706 | else | ||
2707 | nextreq = NULL; | ||
2692 | if ((req->r_op & CEPH_MDS_OP_WRITE)) { | 2708 | if ((req->r_op & CEPH_MDS_OP_WRITE)) { |
2693 | /* write op */ | 2709 | /* write op */ |
2694 | ceph_mdsc_get_request(req); | 2710 | ceph_mdsc_get_request(req); |
2711 | if (nextreq) | ||
2712 | ceph_mdsc_get_request(nextreq); | ||
2695 | mutex_unlock(&mdsc->mutex); | 2713 | mutex_unlock(&mdsc->mutex); |
2696 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", | 2714 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", |
2697 | req->r_tid, want_tid); | 2715 | req->r_tid, want_tid); |
2698 | wait_for_completion(&req->r_safe_completion); | 2716 | wait_for_completion(&req->r_safe_completion); |
2699 | mutex_lock(&mdsc->mutex); | 2717 | mutex_lock(&mdsc->mutex); |
2700 | n = rb_next(&req->r_node); | ||
2701 | ceph_mdsc_put_request(req); | 2718 | ceph_mdsc_put_request(req); |
2702 | } else { | 2719 | if (!nextreq) |
2703 | n = rb_next(&req->r_node); | 2720 | break; /* next dne before, so we're done! */ |
2721 | if (RB_EMPTY_NODE(&nextreq->r_node)) { | ||
2722 | /* next request was removed from tree */ | ||
2723 | ceph_mdsc_put_request(nextreq); | ||
2724 | goto restart; | ||
2725 | } | ||
2726 | ceph_mdsc_put_request(nextreq); /* won't go away */ | ||
2704 | } | 2727 | } |
2705 | if (!n) | 2728 | req = nextreq; |
2706 | break; | ||
2707 | req = rb_entry(n, struct ceph_mds_request, r_node); | ||
2708 | } | 2729 | } |
2709 | mutex_unlock(&mdsc->mutex); | 2730 | mutex_unlock(&mdsc->mutex); |
2710 | dout("wait_unsafe_requests done\n"); | 2731 | dout("wait_unsafe_requests done\n"); |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 781656a49bf8..a32f0f896d9f 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | |||
366 | } | 366 | } |
367 | 367 | ||
368 | /* | 368 | /* |
369 | * return true if this connection ever successfully opened | ||
370 | */ | ||
371 | bool ceph_con_opened(struct ceph_connection *con) | ||
372 | { | ||
373 | return con->connect_seq > 0; | ||
374 | } | ||
375 | |||
376 | /* | ||
369 | * generic get/put | 377 | * generic get/put |
370 | */ | 378 | */ |
371 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) | 379 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) |
@@ -830,13 +838,6 @@ static void prepare_read_connect(struct ceph_connection *con) | |||
830 | con->in_base_pos = 0; | 838 | con->in_base_pos = 0; |
831 | } | 839 | } |
832 | 840 | ||
833 | static void prepare_read_connect_retry(struct ceph_connection *con) | ||
834 | { | ||
835 | dout("prepare_read_connect_retry %p\n", con); | ||
836 | con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr) | ||
837 | + sizeof(con->peer_addr_for_me); | ||
838 | } | ||
839 | |||
840 | static void prepare_read_ack(struct ceph_connection *con) | 841 | static void prepare_read_ack(struct ceph_connection *con) |
841 | { | 842 | { |
842 | dout("prepare_read_ack %p\n", con); | 843 | dout("prepare_read_ack %p\n", con); |
@@ -1146,7 +1147,7 @@ static int process_connect(struct ceph_connection *con) | |||
1146 | } | 1147 | } |
1147 | con->auth_retry = 1; | 1148 | con->auth_retry = 1; |
1148 | prepare_write_connect(con->msgr, con, 0); | 1149 | prepare_write_connect(con->msgr, con, 0); |
1149 | prepare_read_connect_retry(con); | 1150 | prepare_read_connect(con); |
1150 | break; | 1151 | break; |
1151 | 1152 | ||
1152 | case CEPH_MSGR_TAG_RESETSESSION: | 1153 | case CEPH_MSGR_TAG_RESETSESSION: |
@@ -1843,8 +1844,6 @@ static void ceph_fault(struct ceph_connection *con) | |||
1843 | goto out; | 1844 | goto out; |
1844 | } | 1845 | } |
1845 | 1846 | ||
1846 | clear_bit(BUSY, &con->state); /* to avoid an improbable race */ | ||
1847 | |||
1848 | mutex_lock(&con->mutex); | 1847 | mutex_lock(&con->mutex); |
1849 | if (test_bit(CLOSED, &con->state)) | 1848 | if (test_bit(CLOSED, &con->state)) |
1850 | goto out_unlock; | 1849 | goto out_unlock; |
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 4caaa5911110..a343dae73cdc 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr, | |||
223 | struct ceph_connection *con); | 223 | struct ceph_connection *con); |
224 | extern void ceph_con_open(struct ceph_connection *con, | 224 | extern void ceph_con_open(struct ceph_connection *con, |
225 | struct ceph_entity_addr *addr); | 225 | struct ceph_entity_addr *addr); |
226 | extern bool ceph_con_opened(struct ceph_connection *con); | ||
226 | extern void ceph_con_close(struct ceph_connection *con); | 227 | extern void ceph_con_close(struct ceph_connection *con); |
227 | extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); | 228 | extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); |
228 | extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); | 229 | extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index dbe63db9762f..c7b4dedaace6 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) | |||
413 | */ | 413 | */ |
414 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | 414 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) |
415 | { | 415 | { |
416 | struct ceph_osd_request *req; | ||
416 | int ret = 0; | 417 | int ret = 0; |
417 | 418 | ||
418 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); | 419 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); |
419 | if (list_empty(&osd->o_requests)) { | 420 | if (list_empty(&osd->o_requests)) { |
420 | __remove_osd(osdc, osd); | 421 | __remove_osd(osdc, osd); |
422 | } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], | ||
423 | &osd->o_con.peer_addr, | ||
424 | sizeof(osd->o_con.peer_addr)) == 0 && | ||
425 | !ceph_con_opened(&osd->o_con)) { | ||
426 | dout(" osd addr hasn't changed and connection never opened," | ||
427 | " letting msgr retry"); | ||
428 | /* touch each r_stamp for handle_timeout()'s benfit */ | ||
429 | list_for_each_entry(req, &osd->o_requests, r_osd_item) | ||
430 | req->r_stamp = jiffies; | ||
431 | ret = -EAGAIN; | ||
421 | } else { | 432 | } else { |
422 | ceph_con_close(&osd->o_con); | 433 | ceph_con_close(&osd->o_con); |
423 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); | 434 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); |
@@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc, | |||
633 | reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ | 644 | reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ |
634 | reqhead->reassert_version = req->r_reassert_version; | 645 | reqhead->reassert_version = req->r_reassert_version; |
635 | 646 | ||
636 | req->r_sent_stamp = jiffies; | 647 | req->r_stamp = jiffies; |
637 | list_move_tail(&osdc->req_lru, &req->r_req_lru_item); | 648 | list_move_tail(&osdc->req_lru, &req->r_req_lru_item); |
638 | 649 | ||
639 | ceph_msg_get(req->r_request); /* send consumes a ref */ | 650 | ceph_msg_get(req->r_request); /* send consumes a ref */ |
@@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work) | |||
660 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; | 671 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; |
661 | unsigned long keepalive = | 672 | unsigned long keepalive = |
662 | osdc->client->mount_args->osd_keepalive_timeout * HZ; | 673 | osdc->client->mount_args->osd_keepalive_timeout * HZ; |
663 | unsigned long last_sent = 0; | 674 | unsigned long last_stamp = 0; |
664 | struct rb_node *p; | 675 | struct rb_node *p; |
665 | struct list_head slow_osds; | 676 | struct list_head slow_osds; |
666 | 677 | ||
@@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work) | |||
697 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | 708 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, |
698 | r_req_lru_item); | 709 | r_req_lru_item); |
699 | 710 | ||
700 | if (time_before(jiffies, req->r_sent_stamp + timeout)) | 711 | if (time_before(jiffies, req->r_stamp + timeout)) |
701 | break; | 712 | break; |
702 | 713 | ||
703 | BUG_ON(req == last_req && req->r_sent_stamp == last_sent); | 714 | BUG_ON(req == last_req && req->r_stamp == last_stamp); |
704 | last_req = req; | 715 | last_req = req; |
705 | last_sent = req->r_sent_stamp; | 716 | last_stamp = req->r_stamp; |
706 | 717 | ||
707 | osd = req->r_osd; | 718 | osd = req->r_osd; |
708 | BUG_ON(!osd); | 719 | BUG_ON(!osd); |
@@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work) | |||
718 | */ | 729 | */ |
719 | INIT_LIST_HEAD(&slow_osds); | 730 | INIT_LIST_HEAD(&slow_osds); |
720 | list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { | 731 | list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { |
721 | if (time_before(jiffies, req->r_sent_stamp + keepalive)) | 732 | if (time_before(jiffies, req->r_stamp + keepalive)) |
722 | break; | 733 | break; |
723 | 734 | ||
724 | osd = req->r_osd; | 735 | osd = req->r_osd; |
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||
862 | 873 | ||
863 | dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); | 874 | dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); |
864 | if (kickosd) { | 875 | if (kickosd) { |
865 | __reset_osd(osdc, kickosd); | 876 | err = __reset_osd(osdc, kickosd); |
877 | if (err == -EAGAIN) | ||
878 | return 1; | ||
866 | } else { | 879 | } else { |
867 | for (p = rb_first(&osdc->osds); p; p = n) { | 880 | for (p = rb_first(&osdc->osds); p; p = n) { |
868 | struct ceph_osd *osd = | 881 | struct ceph_osd *osd = |
@@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc, | |||
913 | 926 | ||
914 | kick: | 927 | kick: |
915 | dout("kicking %p tid %llu osd%d\n", req, req->r_tid, | 928 | dout("kicking %p tid %llu osd%d\n", req, req->r_tid, |
916 | req->r_osd->o_osd); | 929 | req->r_osd ? req->r_osd->o_osd : -1); |
917 | req->r_flags |= CEPH_OSD_FLAG_RETRY; | 930 | req->r_flags |= CEPH_OSD_FLAG_RETRY; |
918 | err = __send_request(osdc, req); | 931 | err = __send_request(osdc, req); |
919 | if (err) { | 932 | if (err) { |
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index 1b1a3ca43afc..b0759911e7c3 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h | |||
@@ -70,7 +70,7 @@ struct ceph_osd_request { | |||
70 | 70 | ||
71 | char r_oid[40]; /* object name */ | 71 | char r_oid[40]; /* object name */ |
72 | int r_oid_len; | 72 | int r_oid_len; |
73 | unsigned long r_sent_stamp; | 73 | unsigned long r_stamp; /* send OR check time */ |
74 | bool r_resend; /* msg send failed, needs retry */ | 74 | bool r_resend; /* msg send failed, needs retry */ |
75 | 75 | ||
76 | struct ceph_file_layout r_file_layout; | 76 | struct ceph_file_layout r_file_layout; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index b83f2692b835..d82fe87c2a6e 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -480,6 +480,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||
480 | return NULL; | 480 | return NULL; |
481 | } | 481 | } |
482 | 482 | ||
483 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | ||
484 | { | ||
485 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | ||
486 | calc_pg_masks(pi); | ||
487 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | ||
488 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | ||
489 | } | ||
490 | |||
483 | /* | 491 | /* |
484 | * decode a full map. | 492 | * decode a full map. |
485 | */ | 493 | */ |
@@ -526,12 +534,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
526 | ev, CEPH_PG_POOL_VERSION); | 534 | ev, CEPH_PG_POOL_VERSION); |
527 | goto bad; | 535 | goto bad; |
528 | } | 536 | } |
529 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 537 | __decode_pool(p, pi); |
530 | __insert_pg_pool(&map->pg_pools, pi); | 538 | __insert_pg_pool(&map->pg_pools, pi); |
531 | calc_pg_masks(pi); | ||
532 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | ||
533 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) | ||
534 | * sizeof(u64) * 2; | ||
535 | } | 539 | } |
536 | ceph_decode_32_safe(p, end, map->pool_max, bad); | 540 | ceph_decode_32_safe(p, end, map->pool_max, bad); |
537 | 541 | ||
@@ -714,8 +718,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
714 | pi->id = pool; | 718 | pi->id = pool; |
715 | __insert_pg_pool(&map->pg_pools, pi); | 719 | __insert_pg_pool(&map->pg_pools, pi); |
716 | } | 720 | } |
717 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 721 | __decode_pool(p, pi); |
718 | calc_pg_masks(pi); | ||
719 | } | 722 | } |
720 | 723 | ||
721 | /* old_pool */ | 724 | /* old_pool */ |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index bf2a5f3846a4..df04e210a055 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -314,9 +314,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) | |||
314 | because we rebuild_snap_realms() works _downward_ in | 314 | because we rebuild_snap_realms() works _downward_ in |
315 | hierarchy after each update.) */ | 315 | hierarchy after each update.) */ |
316 | if (realm->cached_context && | 316 | if (realm->cached_context && |
317 | realm->cached_context->seq <= realm->seq && | 317 | realm->cached_context->seq == realm->seq && |
318 | (!parent || | 318 | (!parent || |
319 | realm->cached_context->seq <= parent->cached_context->seq)) { | 319 | realm->cached_context->seq >= parent->cached_context->seq)) { |
320 | dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" | 320 | dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" |
321 | " (unchanged)\n", | 321 | " (unchanged)\n", |
322 | realm->ino, realm, realm->cached_context, | 322 | realm->ino, realm, realm->cached_context, |
@@ -818,7 +818,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
818 | * queued (again) by ceph_update_snap_trace() | 818 | * queued (again) by ceph_update_snap_trace() |
819 | * below. Queue it _now_, under the old context. | 819 | * below. Queue it _now_, under the old context. |
820 | */ | 820 | */ |
821 | spin_lock(&realm->inodes_with_caps_lock); | ||
821 | list_del_init(&ci->i_snap_realm_item); | 822 | list_del_init(&ci->i_snap_realm_item); |
823 | spin_unlock(&realm->inodes_with_caps_lock); | ||
822 | spin_unlock(&inode->i_lock); | 824 | spin_unlock(&inode->i_lock); |
823 | 825 | ||
824 | ceph_queue_cap_snap(ci, | 826 | ceph_queue_cap_snap(ci, |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index ef9008b885b5..0d0e97ed3ff6 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -582,7 +582,9 @@ got: | |||
582 | inode->i_generation = sbi->s_next_generation++; | 582 | inode->i_generation = sbi->s_next_generation++; |
583 | spin_unlock(&sbi->s_next_gen_lock); | 583 | spin_unlock(&sbi->s_next_gen_lock); |
584 | 584 | ||
585 | ei->i_state = EXT3_STATE_NEW; | 585 | ei->i_state_flags = 0; |
586 | ext3_set_inode_state(inode, EXT3_STATE_NEW); | ||
587 | |||
586 | ei->i_extra_isize = | 588 | ei->i_extra_isize = |
587 | (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? | 589 | (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? |
588 | sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; | 590 | sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 7f920b7263a4..ea33bdf0a300 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) | |||
2811 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); | 2811 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); |
2812 | inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; | 2812 | inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; |
2813 | 2813 | ||
2814 | ei->i_state = 0; | 2814 | ei->i_state_flags = 0; |
2815 | ei->i_dir_start_lookup = 0; | 2815 | ei->i_dir_start_lookup = 0; |
2816 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 2816 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
2817 | /* We now have enough fields to check if the inode was active or not. | 2817 | /* We now have enough fields to check if the inode was active or not. |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c1ef50154868..6fcc7e71fbaa 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -309,7 +309,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, | |||
309 | { | 309 | { |
310 | struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options; | 310 | struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options; |
311 | wchar_t *ip, *ext_start, *end, *name_start; | 311 | wchar_t *ip, *ext_start, *end, *name_start; |
312 | unsigned char base[9], ext[4], buf[8], *p; | 312 | unsigned char base[9], ext[4], buf[5], *p; |
313 | unsigned char charbuf[NLS_MAX_CHARSET_SIZE]; | 313 | unsigned char charbuf[NLS_MAX_CHARSET_SIZE]; |
314 | int chl, chi; | 314 | int chl, chi; |
315 | int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen; | 315 | int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen; |
@@ -467,7 +467,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, | |||
467 | return 0; | 467 | return 0; |
468 | } | 468 | } |
469 | 469 | ||
470 | i = jiffies & 0xffff; | 470 | i = jiffies; |
471 | sz = (jiffies >> 16) & 0x7; | 471 | sz = (jiffies >> 16) & 0x7; |
472 | if (baselen > 2) { | 472 | if (baselen > 2) { |
473 | baselen = numtail2_baselen; | 473 | baselen = numtail2_baselen; |
@@ -476,7 +476,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, | |||
476 | name_res[baselen + 4] = '~'; | 476 | name_res[baselen + 4] = '~'; |
477 | name_res[baselen + 5] = '1' + sz; | 477 | name_res[baselen + 5] = '1' + sz; |
478 | while (1) { | 478 | while (1) { |
479 | sprintf(buf, "%04X", i); | 479 | snprintf(buf, sizeof(buf), "%04X", i & 0xffff); |
480 | memcpy(&name_res[baselen], buf, 4); | 480 | memcpy(&name_res[baselen], buf, 4); |
481 | if (vfat_find_form(dir, name_res) < 0) | 481 | if (vfat_find_form(dir, name_res) < 0) |
482 | break; | 482 | break; |
diff --git a/fs/fscache/object.c b/fs/fscache/object.c index e513ac599c8e..0b589a9b4ffc 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c | |||
@@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { | |||
53 | static void fscache_object_slow_work_put_ref(struct slow_work *); | 53 | static void fscache_object_slow_work_put_ref(struct slow_work *); |
54 | static int fscache_object_slow_work_get_ref(struct slow_work *); | 54 | static int fscache_object_slow_work_get_ref(struct slow_work *); |
55 | static void fscache_object_slow_work_execute(struct slow_work *); | 55 | static void fscache_object_slow_work_execute(struct slow_work *); |
56 | #ifdef CONFIG_SLOW_WORK_PROC | 56 | #ifdef CONFIG_SLOW_WORK_DEBUG |
57 | static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); | 57 | static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); |
58 | #endif | 58 | #endif |
59 | static void fscache_initialise_object(struct fscache_object *); | 59 | static void fscache_initialise_object(struct fscache_object *); |
@@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = { | |||
69 | .get_ref = fscache_object_slow_work_get_ref, | 69 | .get_ref = fscache_object_slow_work_get_ref, |
70 | .put_ref = fscache_object_slow_work_put_ref, | 70 | .put_ref = fscache_object_slow_work_put_ref, |
71 | .execute = fscache_object_slow_work_execute, | 71 | .execute = fscache_object_slow_work_execute, |
72 | #ifdef CONFIG_SLOW_WORK_PROC | 72 | #ifdef CONFIG_SLOW_WORK_DEBUG |
73 | .desc = fscache_object_slow_work_desc, | 73 | .desc = fscache_object_slow_work_desc, |
74 | #endif | 74 | #endif |
75 | }; | 75 | }; |
@@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work) | |||
364 | /* | 364 | /* |
365 | * describe an object for slow-work debugging | 365 | * describe an object for slow-work debugging |
366 | */ | 366 | */ |
367 | #ifdef CONFIG_SLOW_WORK_PROC | 367 | #ifdef CONFIG_SLOW_WORK_DEBUG |
368 | static void fscache_object_slow_work_desc(struct slow_work *work, | 368 | static void fscache_object_slow_work_desc(struct slow_work *work, |
369 | struct seq_file *m) | 369 | struct seq_file *m) |
370 | { | 370 | { |
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 313e79a14266..9f6c928d4586 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c | |||
@@ -500,7 +500,7 @@ static void fscache_op_execute(struct slow_work *work) | |||
500 | /* | 500 | /* |
501 | * describe an operation for slow-work debugging | 501 | * describe an operation for slow-work debugging |
502 | */ | 502 | */ |
503 | #ifdef CONFIG_SLOW_WORK_PROC | 503 | #ifdef CONFIG_SLOW_WORK_DEBUG |
504 | static void fscache_op_desc(struct slow_work *work, struct seq_file *m) | 504 | static void fscache_op_desc(struct slow_work *work, struct seq_file *m) |
505 | { | 505 | { |
506 | struct fscache_operation *op = | 506 | struct fscache_operation *op = |
@@ -517,7 +517,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = { | |||
517 | .get_ref = fscache_op_get_ref, | 517 | .get_ref = fscache_op_get_ref, |
518 | .put_ref = fscache_op_put_ref, | 518 | .put_ref = fscache_op_put_ref, |
519 | .execute = fscache_op_execute, | 519 | .execute = fscache_op_execute, |
520 | #ifdef CONFIG_SLOW_WORK_PROC | 520 | #ifdef CONFIG_SLOW_WORK_DEBUG |
521 | .desc = fscache_op_desc, | 521 | .desc = fscache_op_desc, |
522 | #endif | 522 | #endif |
523 | }; | 523 | }; |
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 9718c22f186d..a5d0c56d3ebc 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c | |||
@@ -80,6 +80,7 @@ static void writeseg_end_io(struct bio *bio, int err) | |||
80 | prefetchw(&bvec->bv_page->flags); | 80 | prefetchw(&bvec->bv_page->flags); |
81 | 81 | ||
82 | end_page_writeback(page); | 82 | end_page_writeback(page); |
83 | page_cache_release(page); | ||
83 | } while (bvec >= bio->bi_io_vec); | 84 | } while (bvec >= bio->bi_io_vec); |
84 | bio_put(bio); | 85 | bio_put(bio); |
85 | if (atomic_dec_and_test(&super->s_pending_writes)) | 86 | if (atomic_dec_and_test(&super->s_pending_writes)) |
@@ -97,8 +98,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | |||
97 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); | 98 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); |
98 | int i; | 99 | int i; |
99 | 100 | ||
101 | if (max_pages > BIO_MAX_PAGES) | ||
102 | max_pages = BIO_MAX_PAGES; | ||
100 | bio = bio_alloc(GFP_NOFS, max_pages); | 103 | bio = bio_alloc(GFP_NOFS, max_pages); |
101 | BUG_ON(!bio); /* FIXME: handle this */ | 104 | BUG_ON(!bio); |
102 | 105 | ||
103 | for (i = 0; i < nr_pages; i++) { | 106 | for (i = 0; i < nr_pages; i++) { |
104 | if (i >= max_pages) { | 107 | if (i >= max_pages) { |
@@ -191,8 +194,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, | |||
191 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); | 194 | unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); |
192 | int i; | 195 | int i; |
193 | 196 | ||
197 | if (max_pages > BIO_MAX_PAGES) | ||
198 | max_pages = BIO_MAX_PAGES; | ||
194 | bio = bio_alloc(GFP_NOFS, max_pages); | 199 | bio = bio_alloc(GFP_NOFS, max_pages); |
195 | BUG_ON(!bio); /* FIXME: handle this */ | 200 | BUG_ON(!bio); |
196 | 201 | ||
197 | for (i = 0; i < nr_pages; i++) { | 202 | for (i = 0; i < nr_pages; i++) { |
198 | if (i >= max_pages) { | 203 | if (i >= max_pages) { |
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 56a8bfbb0120..c76b4b5c7ff6 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) | |||
303 | (filler_t *)logfs_readpage, NULL); | 303 | (filler_t *)logfs_readpage, NULL); |
304 | if (IS_ERR(page)) | 304 | if (IS_ERR(page)) |
305 | return PTR_ERR(page); | 305 | return PTR_ERR(page); |
306 | dd = kmap_atomic(page, KM_USER0); | 306 | dd = kmap(page); |
307 | BUG_ON(dd->namelen == 0); | 307 | BUG_ON(dd->namelen == 0); |
308 | 308 | ||
309 | full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), | 309 | full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), |
310 | pos, be64_to_cpu(dd->ino), dd->type); | 310 | pos, be64_to_cpu(dd->ino), dd->type); |
311 | kunmap_atomic(dd, KM_USER0); | 311 | kunmap(page); |
312 | page_cache_release(page); | 312 | page_cache_release(page); |
313 | if (full) | 313 | if (full) |
314 | break; | 314 | break; |
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 6ad30a4c9052..d57c7b07b60b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c | |||
@@ -800,6 +800,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) | |||
800 | { | 800 | { |
801 | struct logfs_super *super = logfs_super(sb); | 801 | struct logfs_super *super = logfs_super(sb); |
802 | struct logfs_area *area = super->s_journal_area; | 802 | struct logfs_area *area = super->s_journal_area; |
803 | struct btree_head32 *head = &super->s_reserved_segments; | ||
803 | u32 segno, ec; | 804 | u32 segno, ec; |
804 | int i, err; | 805 | int i, err; |
805 | 806 | ||
@@ -807,6 +808,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) | |||
807 | /* Drop old segments */ | 808 | /* Drop old segments */ |
808 | journal_for_each(i) | 809 | journal_for_each(i) |
809 | if (super->s_journal_seg[i]) { | 810 | if (super->s_journal_seg[i]) { |
811 | btree_remove32(head, super->s_journal_seg[i]); | ||
810 | logfs_set_segment_unreserved(sb, | 812 | logfs_set_segment_unreserved(sb, |
811 | super->s_journal_seg[i], | 813 | super->s_journal_seg[i], |
812 | super->s_journal_ec[i]); | 814 | super->s_journal_ec[i]); |
@@ -819,8 +821,13 @@ void do_logfs_journal_wl_pass(struct super_block *sb) | |||
819 | super->s_journal_seg[i] = segno; | 821 | super->s_journal_seg[i] = segno; |
820 | super->s_journal_ec[i] = ec; | 822 | super->s_journal_ec[i] = ec; |
821 | logfs_set_segment_reserved(sb, segno); | 823 | logfs_set_segment_reserved(sb, segno); |
824 | err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); | ||
825 | BUG_ON(err); /* mempool should prevent this */ | ||
826 | err = logfs_erase_segment(sb, segno, 1); | ||
827 | BUG_ON(err); /* FIXME: remount-ro would be nicer */ | ||
822 | } | 828 | } |
823 | /* Manually move journal_area */ | 829 | /* Manually move journal_area */ |
830 | freeseg(sb, area->a_segno); | ||
824 | area->a_segno = super->s_journal_seg[0]; | 831 | area->a_segno = super->s_journal_seg[0]; |
825 | area->a_is_open = 0; | 832 | area->a_is_open = 0; |
826 | area->a_used_bytes = 0; | 833 | area->a_used_bytes = 0; |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 129779431373..b84b0eec6024 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -587,6 +587,7 @@ void move_page_to_btree(struct page *page); | |||
587 | int logfs_init_mapping(struct super_block *sb); | 587 | int logfs_init_mapping(struct super_block *sb); |
588 | void logfs_sync_area(struct logfs_area *area); | 588 | void logfs_sync_area(struct logfs_area *area); |
589 | void logfs_sync_segments(struct super_block *sb); | 589 | void logfs_sync_segments(struct super_block *sb); |
590 | void freeseg(struct super_block *sb, u32 segno); | ||
590 | 591 | ||
591 | /* area handling */ | 592 | /* area handling */ |
592 | int logfs_init_areas(struct super_block *sb); | 593 | int logfs_init_areas(struct super_block *sb); |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 7a23b3e7c0a7..c3a3a6814b84 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
@@ -1594,7 +1594,6 @@ int logfs_delete(struct inode *inode, pgoff_t index, | |||
1594 | return ret; | 1594 | return ret; |
1595 | } | 1595 | } |
1596 | 1596 | ||
1597 | /* Rewrite cannot mark the inode dirty but has to write it immediatly. */ | ||
1598 | int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, | 1597 | int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, |
1599 | gc_level_t gc_level, long flags) | 1598 | gc_level_t gc_level, long flags) |
1600 | { | 1599 | { |
@@ -1611,6 +1610,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, | |||
1611 | if (level != 0) | 1610 | if (level != 0) |
1612 | alloc_indirect_block(inode, page, 0); | 1611 | alloc_indirect_block(inode, page, 0); |
1613 | err = logfs_write_buf(inode, page, flags); | 1612 | err = logfs_write_buf(inode, page, flags); |
1613 | if (!err && shrink_level(gc_level) == 0) { | ||
1614 | /* Rewrite cannot mark the inode dirty but has to | ||
1615 | * write it immediatly. | ||
1616 | * Q: Can't we just create an alias for the inode | ||
1617 | * instead? And if not, why not? | ||
1618 | */ | ||
1619 | if (inode->i_ino == LOGFS_INO_MASTER) | ||
1620 | logfs_write_anchor(inode->i_sb); | ||
1621 | else { | ||
1622 | err = __logfs_write_inode(inode, flags); | ||
1623 | } | ||
1624 | } | ||
1614 | } | 1625 | } |
1615 | logfs_put_write_page(page); | 1626 | logfs_put_write_page(page); |
1616 | return err; | 1627 | return err; |
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 1a14f9910d55..0ecd8f07c11e 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
@@ -93,50 +93,58 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, | |||
93 | } while (len); | 93 | } while (len); |
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | static void pad_partial_page(struct logfs_area *area) |
97 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. | ||
98 | */ | ||
99 | static void pad_wbuf(struct logfs_area *area, int final) | ||
100 | { | 97 | { |
101 | struct super_block *sb = area->a_sb; | 98 | struct super_block *sb = area->a_sb; |
102 | struct logfs_super *super = logfs_super(sb); | ||
103 | struct page *page; | 99 | struct page *page; |
104 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | 100 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); |
105 | pgoff_t index = ofs >> PAGE_SHIFT; | 101 | pgoff_t index = ofs >> PAGE_SHIFT; |
106 | long offset = ofs & (PAGE_SIZE-1); | 102 | long offset = ofs & (PAGE_SIZE-1); |
107 | u32 len = PAGE_SIZE - offset; | 103 | u32 len = PAGE_SIZE - offset; |
108 | 104 | ||
109 | if (len == PAGE_SIZE) { | 105 | if (len % PAGE_SIZE) { |
110 | /* The math in this function can surely use some love */ | 106 | page = get_mapping_page(sb, index, 0); |
111 | len = 0; | ||
112 | } | ||
113 | if (len) { | ||
114 | BUG_ON(area->a_used_bytes >= super->s_segsize); | ||
115 | |||
116 | page = get_mapping_page(area->a_sb, index, 0); | ||
117 | BUG_ON(!page); /* FIXME: reserve a pool */ | 107 | BUG_ON(!page); /* FIXME: reserve a pool */ |
118 | memset(page_address(page) + offset, 0xff, len); | 108 | memset(page_address(page) + offset, 0xff, len); |
119 | SetPagePrivate(page); | 109 | SetPagePrivate(page); |
120 | page_cache_release(page); | 110 | page_cache_release(page); |
121 | } | 111 | } |
112 | } | ||
122 | 113 | ||
123 | if (!final) | 114 | static void pad_full_pages(struct logfs_area *area) |
124 | return; | 115 | { |
116 | struct super_block *sb = area->a_sb; | ||
117 | struct logfs_super *super = logfs_super(sb); | ||
118 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | ||
119 | u32 len = super->s_segsize - area->a_used_bytes; | ||
120 | pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; | ||
121 | pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; | ||
122 | struct page *page; | ||
125 | 123 | ||
126 | area->a_used_bytes += len; | 124 | while (no_indizes) { |
127 | for ( ; area->a_used_bytes < super->s_segsize; | 125 | page = get_mapping_page(sb, index, 0); |
128 | area->a_used_bytes += PAGE_SIZE) { | ||
129 | /* Memset another page */ | ||
130 | index++; | ||
131 | page = get_mapping_page(area->a_sb, index, 0); | ||
132 | BUG_ON(!page); /* FIXME: reserve a pool */ | 126 | BUG_ON(!page); /* FIXME: reserve a pool */ |
133 | memset(page_address(page), 0xff, PAGE_SIZE); | 127 | SetPageUptodate(page); |
128 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
134 | SetPagePrivate(page); | 129 | SetPagePrivate(page); |
135 | page_cache_release(page); | 130 | page_cache_release(page); |
131 | index++; | ||
132 | no_indizes--; | ||
136 | } | 133 | } |
137 | } | 134 | } |
138 | 135 | ||
139 | /* | 136 | /* |
137 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. | ||
138 | * Also make sure we allocate (and memset) all pages for final writeout. | ||
139 | */ | ||
140 | static void pad_wbuf(struct logfs_area *area, int final) | ||
141 | { | ||
142 | pad_partial_page(area); | ||
143 | if (final) | ||
144 | pad_full_pages(area); | ||
145 | } | ||
146 | |||
147 | /* | ||
140 | * We have to be careful with the alias tree. Since lookup is done by bix, | 148 | * We have to be careful with the alias tree. Since lookup is done by bix, |
141 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with | 149 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with |
142 | * indirect blocks. So always use it through accessor functions. | 150 | * indirect blocks. So always use it through accessor functions. |
@@ -683,7 +691,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) | |||
683 | return 0; | 691 | return 0; |
684 | } | 692 | } |
685 | 693 | ||
686 | static void freeseg(struct super_block *sb, u32 segno) | 694 | void freeseg(struct super_block *sb, u32 segno) |
687 | { | 695 | { |
688 | struct logfs_super *super = logfs_super(sb); | 696 | struct logfs_super *super = logfs_super(sb); |
689 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | 697 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
diff --git a/fs/logfs/super.c b/fs/logfs/super.c index c66beab78dee..9d856c49afc5 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c | |||
@@ -277,7 +277,7 @@ static int logfs_recover_sb(struct super_block *sb) | |||
277 | } | 277 | } |
278 | if (valid0 && valid1 && ds_cmp(ds0, ds1)) { | 278 | if (valid0 && valid1 && ds_cmp(ds0, ds1)) { |
279 | printk(KERN_INFO"Superblocks don't match - fixing.\n"); | 279 | printk(KERN_INFO"Superblocks don't match - fixing.\n"); |
280 | return write_one_sb(sb, super->s_devops->find_last_sb); | 280 | return logfs_write_sb(sb); |
281 | } | 281 | } |
282 | /* If neither is valid now, something's wrong. Didn't we properly | 282 | /* If neither is valid now, something's wrong. Didn't we properly |
283 | * check them before?!? */ | 283 | * check them before?!? */ |
@@ -289,6 +289,10 @@ static int logfs_make_writeable(struct super_block *sb) | |||
289 | { | 289 | { |
290 | int err; | 290 | int err; |
291 | 291 | ||
292 | err = logfs_open_segfile(sb); | ||
293 | if (err) | ||
294 | return err; | ||
295 | |||
292 | /* Repair any broken superblock copies */ | 296 | /* Repair any broken superblock copies */ |
293 | err = logfs_recover_sb(sb); | 297 | err = logfs_recover_sb(sb); |
294 | if (err) | 298 | if (err) |
@@ -299,10 +303,6 @@ static int logfs_make_writeable(struct super_block *sb) | |||
299 | if (err) | 303 | if (err) |
300 | return err; | 304 | return err; |
301 | 305 | ||
302 | err = logfs_open_segfile(sb); | ||
303 | if (err) | ||
304 | return err; | ||
305 | |||
306 | /* Do one GC pass before any data gets dirtied */ | 306 | /* Do one GC pass before any data gets dirtied */ |
307 | logfs_gc_pass(sb); | 307 | logfs_gc_pass(sb); |
308 | 308 | ||
@@ -328,7 +328,7 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) | |||
328 | 328 | ||
329 | sb->s_root = d_alloc_root(rootdir); | 329 | sb->s_root = d_alloc_root(rootdir); |
330 | if (!sb->s_root) | 330 | if (!sb->s_root) |
331 | goto fail; | 331 | goto fail2; |
332 | 332 | ||
333 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); | 333 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); |
334 | if (!super->s_erase_page) | 334 | if (!super->s_erase_page) |
@@ -572,8 +572,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags, | |||
572 | return 0; | 572 | return 0; |
573 | 573 | ||
574 | err1: | 574 | err1: |
575 | up_write(&sb->s_umount); | 575 | deactivate_locked_super(sb); |
576 | deactivate_super(sb); | ||
577 | return err; | 576 | return err; |
578 | err0: | 577 | err0: |
579 | kfree(super); | 578 | kfree(super); |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 0501974bedd0..8ccf0f8c9cc8 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include "alloc.h" | 30 | #include "alloc.h" |
31 | #include "dlmglue.h" | 31 | #include "dlmglue.h" |
32 | #include "file.h" | 32 | #include "file.h" |
33 | #include "inode.h" | ||
34 | #include "journal.h" | ||
33 | #include "ocfs2_fs.h" | 35 | #include "ocfs2_fs.h" |
34 | 36 | ||
35 | #include "xattr.h" | 37 | #include "xattr.h" |
@@ -166,6 +168,60 @@ static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type) | |||
166 | } | 168 | } |
167 | 169 | ||
168 | /* | 170 | /* |
171 | * Helper function to set i_mode in memory and disk. Some call paths | ||
172 | * will not have di_bh or a journal handle to pass, in which case it | ||
173 | * will create it's own. | ||
174 | */ | ||
175 | static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | ||
176 | handle_t *handle, umode_t new_mode) | ||
177 | { | ||
178 | int ret, commit_handle = 0; | ||
179 | struct ocfs2_dinode *di; | ||
180 | |||
181 | if (di_bh == NULL) { | ||
182 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
183 | if (ret) { | ||
184 | mlog_errno(ret); | ||
185 | goto out; | ||
186 | } | ||
187 | } else | ||
188 | get_bh(di_bh); | ||
189 | |||
190 | if (handle == NULL) { | ||
191 | handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), | ||
192 | OCFS2_INODE_UPDATE_CREDITS); | ||
193 | if (IS_ERR(handle)) { | ||
194 | ret = PTR_ERR(handle); | ||
195 | mlog_errno(ret); | ||
196 | goto out_brelse; | ||
197 | } | ||
198 | |||
199 | commit_handle = 1; | ||
200 | } | ||
201 | |||
202 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
203 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
204 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
205 | if (ret) { | ||
206 | mlog_errno(ret); | ||
207 | goto out_commit; | ||
208 | } | ||
209 | |||
210 | inode->i_mode = new_mode; | ||
211 | di->i_mode = cpu_to_le16(inode->i_mode); | ||
212 | |||
213 | ocfs2_journal_dirty(handle, di_bh); | ||
214 | |||
215 | out_commit: | ||
216 | if (commit_handle) | ||
217 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
218 | out_brelse: | ||
219 | brelse(di_bh); | ||
220 | out: | ||
221 | return ret; | ||
222 | } | ||
223 | |||
224 | /* | ||
169 | * Set the access or default ACL of an inode. | 225 | * Set the access or default ACL of an inode. |
170 | */ | 226 | */ |
171 | static int ocfs2_set_acl(handle_t *handle, | 227 | static int ocfs2_set_acl(handle_t *handle, |
@@ -193,9 +249,14 @@ static int ocfs2_set_acl(handle_t *handle, | |||
193 | if (ret < 0) | 249 | if (ret < 0) |
194 | return ret; | 250 | return ret; |
195 | else { | 251 | else { |
196 | inode->i_mode = mode; | ||
197 | if (ret == 0) | 252 | if (ret == 0) |
198 | acl = NULL; | 253 | acl = NULL; |
254 | |||
255 | ret = ocfs2_acl_set_mode(inode, di_bh, | ||
256 | handle, mode); | ||
257 | if (ret) | ||
258 | return ret; | ||
259 | |||
199 | } | 260 | } |
200 | } | 261 | } |
201 | break; | 262 | break; |
@@ -283,6 +344,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
283 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 344 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
284 | struct posix_acl *acl = NULL; | 345 | struct posix_acl *acl = NULL; |
285 | int ret = 0; | 346 | int ret = 0; |
347 | mode_t mode; | ||
286 | 348 | ||
287 | if (!S_ISLNK(inode->i_mode)) { | 349 | if (!S_ISLNK(inode->i_mode)) { |
288 | if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { | 350 | if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { |
@@ -291,12 +353,17 @@ int ocfs2_init_acl(handle_t *handle, | |||
291 | if (IS_ERR(acl)) | 353 | if (IS_ERR(acl)) |
292 | return PTR_ERR(acl); | 354 | return PTR_ERR(acl); |
293 | } | 355 | } |
294 | if (!acl) | 356 | if (!acl) { |
295 | inode->i_mode &= ~current_umask(); | 357 | mode = inode->i_mode & ~current_umask(); |
358 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); | ||
359 | if (ret) { | ||
360 | mlog_errno(ret); | ||
361 | goto cleanup; | ||
362 | } | ||
363 | } | ||
296 | } | 364 | } |
297 | if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { | 365 | if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { |
298 | struct posix_acl *clone; | 366 | struct posix_acl *clone; |
299 | mode_t mode; | ||
300 | 367 | ||
301 | if (S_ISDIR(inode->i_mode)) { | 368 | if (S_ISDIR(inode->i_mode)) { |
302 | ret = ocfs2_set_acl(handle, inode, di_bh, | 369 | ret = ocfs2_set_acl(handle, inode, di_bh, |
@@ -313,7 +380,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
313 | mode = inode->i_mode; | 380 | mode = inode->i_mode; |
314 | ret = posix_acl_create_masq(clone, &mode); | 381 | ret = posix_acl_create_masq(clone, &mode); |
315 | if (ret >= 0) { | 382 | if (ret >= 0) { |
316 | inode->i_mode = mode; | 383 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); |
317 | if (ret > 0) { | 384 | if (ret > 0) { |
318 | ret = ocfs2_set_acl(handle, inode, | 385 | ret = ocfs2_set_acl(handle, inode, |
319 | di_bh, ACL_TYPE_ACCESS, | 386 | di_bh, ACL_TYPE_ACCESS, |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a659606dcb95..9289b4357d27 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -1875,7 +1875,6 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1875 | ok: | 1875 | ok: |
1876 | spin_unlock(&res->spinlock); | 1876 | spin_unlock(&res->spinlock); |
1877 | } | 1877 | } |
1878 | spin_unlock(&dlm->spinlock); | ||
1879 | 1878 | ||
1880 | // mlog(0, "woo! got an assert_master from node %u!\n", | 1879 | // mlog(0, "woo! got an assert_master from node %u!\n", |
1881 | // assert->node_idx); | 1880 | // assert->node_idx); |
@@ -1926,7 +1925,6 @@ ok: | |||
1926 | /* master is known, detach if not already detached. | 1925 | /* master is known, detach if not already detached. |
1927 | * ensures that only one assert_master call will happen | 1926 | * ensures that only one assert_master call will happen |
1928 | * on this mle. */ | 1927 | * on this mle. */ |
1929 | spin_lock(&dlm->spinlock); | ||
1930 | spin_lock(&dlm->master_lock); | 1928 | spin_lock(&dlm->master_lock); |
1931 | 1929 | ||
1932 | rr = atomic_read(&mle->mle_refs.refcount); | 1930 | rr = atomic_read(&mle->mle_refs.refcount); |
@@ -1959,7 +1957,6 @@ ok: | |||
1959 | __dlm_put_mle(mle); | 1957 | __dlm_put_mle(mle); |
1960 | } | 1958 | } |
1961 | spin_unlock(&dlm->master_lock); | 1959 | spin_unlock(&dlm->master_lock); |
1962 | spin_unlock(&dlm->spinlock); | ||
1963 | } else if (res) { | 1960 | } else if (res) { |
1964 | if (res->owner != assert->node_idx) { | 1961 | if (res->owner != assert->node_idx) { |
1965 | mlog(0, "assert_master from %u, but current " | 1962 | mlog(0, "assert_master from %u, but current " |
@@ -1967,6 +1964,7 @@ ok: | |||
1967 | res->owner, namelen, name); | 1964 | res->owner, namelen, name); |
1968 | } | 1965 | } |
1969 | } | 1966 | } |
1967 | spin_unlock(&dlm->spinlock); | ||
1970 | 1968 | ||
1971 | done: | 1969 | done: |
1972 | ret = 0; | 1970 | ret = 0; |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 278a223aae14..ab207901d32a 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -891,6 +891,21 @@ static int ocfs2_query_inode_wipe(struct inode *inode, | |||
891 | /* Do some basic inode verification... */ | 891 | /* Do some basic inode verification... */ |
892 | di = (struct ocfs2_dinode *) di_bh->b_data; | 892 | di = (struct ocfs2_dinode *) di_bh->b_data; |
893 | if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { | 893 | if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { |
894 | /* | ||
895 | * Inodes in the orphan dir must have ORPHANED_FL. The only | ||
896 | * inodes that come back out of the orphan dir are reflink | ||
897 | * targets. A reflink target may be moved out of the orphan | ||
898 | * dir between the time we scan the directory and the time we | ||
899 | * process it. This would lead to HAS_REFCOUNT_FL being set but | ||
900 | * ORPHANED_FL not. | ||
901 | */ | ||
902 | if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { | ||
903 | mlog(0, "Reflinked inode %llu is no longer orphaned. " | ||
904 | "it shouldn't be deleted\n", | ||
905 | (unsigned long long)oi->ip_blkno); | ||
906 | goto bail; | ||
907 | } | ||
908 | |||
894 | /* for lack of a better error? */ | 909 | /* for lack of a better error? */ |
895 | status = -EEXIST; | 910 | status = -EEXIST; |
896 | mlog(ML_ERROR, | 911 | mlog(ML_ERROR, |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ca992d91f511..c983715d8d8c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -872,8 +872,10 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, | |||
872 | (unsigned long long)la_start_blk, | 872 | (unsigned long long)la_start_blk, |
873 | (unsigned long long)blkno); | 873 | (unsigned long long)blkno); |
874 | 874 | ||
875 | status = ocfs2_free_clusters(handle, main_bm_inode, | 875 | status = ocfs2_release_clusters(handle, |
876 | main_bm_bh, blkno, count); | 876 | main_bm_inode, |
877 | main_bm_bh, blkno, | ||
878 | count); | ||
877 | if (status < 0) { | 879 | if (status < 0) { |
878 | mlog_errno(status); | 880 | mlog_errno(status); |
879 | goto bail; | 881 | goto bail; |
@@ -984,8 +986,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |||
984 | } | 986 | } |
985 | 987 | ||
986 | retry_enospc: | 988 | retry_enospc: |
987 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; | 989 | (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; |
988 | |||
989 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | 990 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); |
990 | if (status == -ENOSPC) { | 991 | if (status == -ENOSPC) { |
991 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | 992 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == |
@@ -1061,6 +1062,7 @@ retry_enospc: | |||
1061 | OCFS2_LA_DISABLED) | 1062 | OCFS2_LA_DISABLED) |
1062 | goto bail; | 1063 | goto bail; |
1063 | 1064 | ||
1065 | ac->ac_bits_wanted = osb->local_alloc_default_bits; | ||
1064 | status = ocfs2_claim_clusters(osb, handle, ac, | 1066 | status = ocfs2_claim_clusters(osb, handle, ac, |
1065 | osb->local_alloc_bits, | 1067 | osb->local_alloc_bits, |
1066 | &cluster_off, | 1068 | &cluster_off, |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 544ac6245175..b5cb3ede9408 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c | |||
@@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) | |||
133 | 133 | ||
134 | if (!(fl->fl_flags & FL_POSIX)) | 134 | if (!(fl->fl_flags & FL_POSIX)) |
135 | return -ENOLCK; | 135 | return -ENOLCK; |
136 | if (__mandatory_lock(inode)) | 136 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) |
137 | return -ENOLCK; | 137 | return -ENOLCK; |
138 | 138 | ||
139 | return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); | 139 | return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index d9cd4e373a53..b1eb50ae4097 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
84 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 84 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
85 | handle_t *handle, | 85 | handle_t *handle, |
86 | struct inode *inode, | 86 | struct inode *inode, |
87 | struct ocfs2_dinode *fe, | 87 | struct buffer_head *fe_bh, |
88 | char *name, | 88 | char *name, |
89 | struct ocfs2_dir_lookup_result *lookup, | 89 | struct ocfs2_dir_lookup_result *lookup, |
90 | struct inode *orphan_dir_inode); | 90 | struct inode *orphan_dir_inode); |
@@ -879,7 +879,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
879 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 879 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
880 | 880 | ||
881 | if (inode_is_unlinkable(inode)) { | 881 | if (inode_is_unlinkable(inode)) { |
882 | status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, | 882 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, |
883 | &orphan_insert, orphan_dir); | 883 | &orphan_insert, orphan_dir); |
884 | if (status < 0) { | 884 | if (status < 0) { |
885 | mlog_errno(status); | 885 | mlog_errno(status); |
@@ -1300,7 +1300,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1300 | if (S_ISDIR(new_inode->i_mode) || | 1300 | if (S_ISDIR(new_inode->i_mode) || |
1301 | (ocfs2_read_links_count(newfe) == 1)) { | 1301 | (ocfs2_read_links_count(newfe) == 1)) { |
1302 | status = ocfs2_orphan_add(osb, handle, new_inode, | 1302 | status = ocfs2_orphan_add(osb, handle, new_inode, |
1303 | newfe, orphan_name, | 1303 | newfe_bh, orphan_name, |
1304 | &orphan_insert, orphan_dir); | 1304 | &orphan_insert, orphan_dir); |
1305 | if (status < 0) { | 1305 | if (status < 0) { |
1306 | mlog_errno(status); | 1306 | mlog_errno(status); |
@@ -1911,7 +1911,7 @@ leave: | |||
1911 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 1911 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
1912 | handle_t *handle, | 1912 | handle_t *handle, |
1913 | struct inode *inode, | 1913 | struct inode *inode, |
1914 | struct ocfs2_dinode *fe, | 1914 | struct buffer_head *fe_bh, |
1915 | char *name, | 1915 | char *name, |
1916 | struct ocfs2_dir_lookup_result *lookup, | 1916 | struct ocfs2_dir_lookup_result *lookup, |
1917 | struct inode *orphan_dir_inode) | 1917 | struct inode *orphan_dir_inode) |
@@ -1919,6 +1919,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1919 | struct buffer_head *orphan_dir_bh = NULL; | 1919 | struct buffer_head *orphan_dir_bh = NULL; |
1920 | int status = 0; | 1920 | int status = 0; |
1921 | struct ocfs2_dinode *orphan_fe; | 1921 | struct ocfs2_dinode *orphan_fe; |
1922 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
1922 | 1923 | ||
1923 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); | 1924 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); |
1924 | 1925 | ||
@@ -1959,6 +1960,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1959 | goto leave; | 1960 | goto leave; |
1960 | } | 1961 | } |
1961 | 1962 | ||
1963 | /* | ||
1964 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
1965 | * It's safe anyway, though some callers may duplicate the journaling. | ||
1966 | * Journaling within the func just make the logic look more | ||
1967 | * straightforward. | ||
1968 | */ | ||
1969 | status = ocfs2_journal_access_di(handle, | ||
1970 | INODE_CACHE(inode), | ||
1971 | fe_bh, | ||
1972 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1973 | if (status < 0) { | ||
1974 | mlog_errno(status); | ||
1975 | goto leave; | ||
1976 | } | ||
1977 | |||
1962 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); | 1978 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); |
1963 | 1979 | ||
1964 | /* Record which orphan dir our inode now resides | 1980 | /* Record which orphan dir our inode now resides |
@@ -1966,6 +1982,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1966 | * dir to lock. */ | 1982 | * dir to lock. */ |
1967 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); | 1983 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); |
1968 | 1984 | ||
1985 | ocfs2_journal_dirty(handle, fe_bh); | ||
1986 | |||
1969 | mlog(0, "Inode %llu orphaned in slot %d\n", | 1987 | mlog(0, "Inode %llu orphaned in slot %d\n", |
1970 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); | 1988 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); |
1971 | 1989 | ||
@@ -2123,7 +2141,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2123 | } | 2141 | } |
2124 | 2142 | ||
2125 | di = (struct ocfs2_dinode *)new_di_bh->b_data; | 2143 | di = (struct ocfs2_dinode *)new_di_bh->b_data; |
2126 | status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name, | 2144 | status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, |
2127 | &orphan_insert, orphan_dir); | 2145 | &orphan_insert, orphan_dir); |
2128 | if (status < 0) { | 2146 | if (status < 0) { |
2129 | mlog_errno(status); | 2147 | mlog_errno(status); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1238b491db90..adf5e2ebc2c4 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -763,8 +763,18 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, | |||
763 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); | 763 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); |
764 | } | 764 | } |
765 | 765 | ||
766 | #define ocfs2_set_bit ext2_set_bit | 766 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) |
767 | #define ocfs2_clear_bit ext2_clear_bit | 767 | { |
768 | ext2_set_bit(bit, bitmap); | ||
769 | } | ||
770 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) | ||
771 | |||
772 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) | ||
773 | { | ||
774 | ext2_clear_bit(bit, bitmap); | ||
775 | } | ||
776 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) | ||
777 | |||
768 | #define ocfs2_test_bit ext2_test_bit | 778 | #define ocfs2_test_bit ext2_test_bit |
769 | #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit | 779 | #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit |
770 | #define ocfs2_find_next_bit ext2_find_next_bit | 780 | #define ocfs2_find_next_bit ext2_find_next_bit |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9e96921dffda..29405f2ff616 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4075,6 +4075,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode, | |||
4075 | OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; | 4075 | OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; |
4076 | spin_unlock(&OCFS2_I(t_inode)->ip_lock); | 4076 | spin_unlock(&OCFS2_I(t_inode)->ip_lock); |
4077 | i_size_write(t_inode, size); | 4077 | i_size_write(t_inode, size); |
4078 | t_inode->i_blocks = s_inode->i_blocks; | ||
4078 | 4079 | ||
4079 | di->i_xattr_inline_size = s_di->i_xattr_inline_size; | 4080 | di->i_xattr_inline_size = s_di->i_xattr_inline_size; |
4080 | di->i_clusters = s_di->i_clusters; | 4081 | di->i_clusters = s_di->i_clusters; |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c3c60bc3e072..19ba00f28547 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -95,13 +95,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
95 | struct buffer_head *group_bh, | 95 | struct buffer_head *group_bh, |
96 | unsigned int bit_off, | 96 | unsigned int bit_off, |
97 | unsigned int num_bits); | 97 | unsigned int num_bits); |
98 | static inline int ocfs2_block_group_clear_bits(handle_t *handle, | ||
99 | struct inode *alloc_inode, | ||
100 | struct ocfs2_group_desc *bg, | ||
101 | struct buffer_head *group_bh, | ||
102 | unsigned int bit_off, | ||
103 | unsigned int num_bits); | ||
104 | |||
105 | static int ocfs2_relink_block_group(handle_t *handle, | 98 | static int ocfs2_relink_block_group(handle_t *handle, |
106 | struct inode *alloc_inode, | 99 | struct inode *alloc_inode, |
107 | struct buffer_head *fe_bh, | 100 | struct buffer_head *fe_bh, |
@@ -152,7 +145,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
152 | 145 | ||
153 | #define do_error(fmt, ...) \ | 146 | #define do_error(fmt, ...) \ |
154 | do{ \ | 147 | do{ \ |
155 | if (clean_error) \ | 148 | if (resize) \ |
156 | mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ | 149 | mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ |
157 | else \ | 150 | else \ |
158 | ocfs2_error(sb, fmt, ##__VA_ARGS__); \ | 151 | ocfs2_error(sb, fmt, ##__VA_ARGS__); \ |
@@ -160,7 +153,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
160 | 153 | ||
161 | static int ocfs2_validate_gd_self(struct super_block *sb, | 154 | static int ocfs2_validate_gd_self(struct super_block *sb, |
162 | struct buffer_head *bh, | 155 | struct buffer_head *bh, |
163 | int clean_error) | 156 | int resize) |
164 | { | 157 | { |
165 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; | 158 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
166 | 159 | ||
@@ -211,7 +204,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb, | |||
211 | static int ocfs2_validate_gd_parent(struct super_block *sb, | 204 | static int ocfs2_validate_gd_parent(struct super_block *sb, |
212 | struct ocfs2_dinode *di, | 205 | struct ocfs2_dinode *di, |
213 | struct buffer_head *bh, | 206 | struct buffer_head *bh, |
214 | int clean_error) | 207 | int resize) |
215 | { | 208 | { |
216 | unsigned int max_bits; | 209 | unsigned int max_bits; |
217 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; | 210 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; |
@@ -233,8 +226,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb, | |||
233 | return -EINVAL; | 226 | return -EINVAL; |
234 | } | 227 | } |
235 | 228 | ||
236 | if (le16_to_cpu(gd->bg_chain) >= | 229 | /* In resize, we may meet the case bg_chain == cl_next_free_rec. */ |
237 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { | 230 | if ((le16_to_cpu(gd->bg_chain) > |
231 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) || | ||
232 | ((le16_to_cpu(gd->bg_chain) == | ||
233 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) { | ||
238 | do_error("Group descriptor #%llu has bad chain %u", | 234 | do_error("Group descriptor #%llu has bad chain %u", |
239 | (unsigned long long)bh->b_blocknr, | 235 | (unsigned long long)bh->b_blocknr, |
240 | le16_to_cpu(gd->bg_chain)); | 236 | le16_to_cpu(gd->bg_chain)); |
@@ -1975,18 +1971,18 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
1975 | bits_wanted, cluster_start, num_clusters); | 1971 | bits_wanted, cluster_start, num_clusters); |
1976 | } | 1972 | } |
1977 | 1973 | ||
1978 | static inline int ocfs2_block_group_clear_bits(handle_t *handle, | 1974 | static int ocfs2_block_group_clear_bits(handle_t *handle, |
1979 | struct inode *alloc_inode, | 1975 | struct inode *alloc_inode, |
1980 | struct ocfs2_group_desc *bg, | 1976 | struct ocfs2_group_desc *bg, |
1981 | struct buffer_head *group_bh, | 1977 | struct buffer_head *group_bh, |
1982 | unsigned int bit_off, | 1978 | unsigned int bit_off, |
1983 | unsigned int num_bits) | 1979 | unsigned int num_bits, |
1980 | void (*undo_fn)(unsigned int bit, | ||
1981 | unsigned long *bmap)) | ||
1984 | { | 1982 | { |
1985 | int status; | 1983 | int status; |
1986 | unsigned int tmp; | 1984 | unsigned int tmp; |
1987 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | ||
1988 | struct ocfs2_group_desc *undo_bg = NULL; | 1985 | struct ocfs2_group_desc *undo_bg = NULL; |
1989 | int cluster_bitmap = 0; | ||
1990 | 1986 | ||
1991 | mlog_entry_void(); | 1987 | mlog_entry_void(); |
1992 | 1988 | ||
@@ -1996,20 +1992,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, | |||
1996 | 1992 | ||
1997 | mlog(0, "off = %u, num = %u\n", bit_off, num_bits); | 1993 | mlog(0, "off = %u, num = %u\n", bit_off, num_bits); |
1998 | 1994 | ||
1999 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | 1995 | BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); |
2000 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | ||
2001 | |||
2002 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1996 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
2003 | group_bh, journal_type); | 1997 | group_bh, |
1998 | undo_fn ? | ||
1999 | OCFS2_JOURNAL_ACCESS_UNDO : | ||
2000 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2004 | if (status < 0) { | 2001 | if (status < 0) { |
2005 | mlog_errno(status); | 2002 | mlog_errno(status); |
2006 | goto bail; | 2003 | goto bail; |
2007 | } | 2004 | } |
2008 | 2005 | ||
2009 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | 2006 | if (undo_fn) { |
2010 | cluster_bitmap = 1; | ||
2011 | |||
2012 | if (cluster_bitmap) { | ||
2013 | jbd_lock_bh_state(group_bh); | 2007 | jbd_lock_bh_state(group_bh); |
2014 | undo_bg = (struct ocfs2_group_desc *) | 2008 | undo_bg = (struct ocfs2_group_desc *) |
2015 | bh2jh(group_bh)->b_committed_data; | 2009 | bh2jh(group_bh)->b_committed_data; |
@@ -2020,13 +2014,13 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, | |||
2020 | while(tmp--) { | 2014 | while(tmp--) { |
2021 | ocfs2_clear_bit((bit_off + tmp), | 2015 | ocfs2_clear_bit((bit_off + tmp), |
2022 | (unsigned long *) bg->bg_bitmap); | 2016 | (unsigned long *) bg->bg_bitmap); |
2023 | if (cluster_bitmap) | 2017 | if (undo_fn) |
2024 | ocfs2_set_bit(bit_off + tmp, | 2018 | undo_fn(bit_off + tmp, |
2025 | (unsigned long *) undo_bg->bg_bitmap); | 2019 | (unsigned long *) undo_bg->bg_bitmap); |
2026 | } | 2020 | } |
2027 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); | 2021 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); |
2028 | 2022 | ||
2029 | if (cluster_bitmap) | 2023 | if (undo_fn) |
2030 | jbd_unlock_bh_state(group_bh); | 2024 | jbd_unlock_bh_state(group_bh); |
2031 | 2025 | ||
2032 | status = ocfs2_journal_dirty(handle, group_bh); | 2026 | status = ocfs2_journal_dirty(handle, group_bh); |
@@ -2039,12 +2033,14 @@ bail: | |||
2039 | /* | 2033 | /* |
2040 | * expects the suballoc inode to already be locked. | 2034 | * expects the suballoc inode to already be locked. |
2041 | */ | 2035 | */ |
2042 | int ocfs2_free_suballoc_bits(handle_t *handle, | 2036 | static int _ocfs2_free_suballoc_bits(handle_t *handle, |
2043 | struct inode *alloc_inode, | 2037 | struct inode *alloc_inode, |
2044 | struct buffer_head *alloc_bh, | 2038 | struct buffer_head *alloc_bh, |
2045 | unsigned int start_bit, | 2039 | unsigned int start_bit, |
2046 | u64 bg_blkno, | 2040 | u64 bg_blkno, |
2047 | unsigned int count) | 2041 | unsigned int count, |
2042 | void (*undo_fn)(unsigned int bit, | ||
2043 | unsigned long *bitmap)) | ||
2048 | { | 2044 | { |
2049 | int status = 0; | 2045 | int status = 0; |
2050 | u32 tmp_used; | 2046 | u32 tmp_used; |
@@ -2079,7 +2075,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
2079 | 2075 | ||
2080 | status = ocfs2_block_group_clear_bits(handle, alloc_inode, | 2076 | status = ocfs2_block_group_clear_bits(handle, alloc_inode, |
2081 | group, group_bh, | 2077 | group, group_bh, |
2082 | start_bit, count); | 2078 | start_bit, count, undo_fn); |
2083 | if (status < 0) { | 2079 | if (status < 0) { |
2084 | mlog_errno(status); | 2080 | mlog_errno(status); |
2085 | goto bail; | 2081 | goto bail; |
@@ -2110,6 +2106,17 @@ bail: | |||
2110 | return status; | 2106 | return status; |
2111 | } | 2107 | } |
2112 | 2108 | ||
2109 | int ocfs2_free_suballoc_bits(handle_t *handle, | ||
2110 | struct inode *alloc_inode, | ||
2111 | struct buffer_head *alloc_bh, | ||
2112 | unsigned int start_bit, | ||
2113 | u64 bg_blkno, | ||
2114 | unsigned int count) | ||
2115 | { | ||
2116 | return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh, | ||
2117 | start_bit, bg_blkno, count, NULL); | ||
2118 | } | ||
2119 | |||
2113 | int ocfs2_free_dinode(handle_t *handle, | 2120 | int ocfs2_free_dinode(handle_t *handle, |
2114 | struct inode *inode_alloc_inode, | 2121 | struct inode *inode_alloc_inode, |
2115 | struct buffer_head *inode_alloc_bh, | 2122 | struct buffer_head *inode_alloc_bh, |
@@ -2123,11 +2130,13 @@ int ocfs2_free_dinode(handle_t *handle, | |||
2123 | inode_alloc_bh, bit, bg_blkno, 1); | 2130 | inode_alloc_bh, bit, bg_blkno, 1); |
2124 | } | 2131 | } |
2125 | 2132 | ||
2126 | int ocfs2_free_clusters(handle_t *handle, | 2133 | static int _ocfs2_free_clusters(handle_t *handle, |
2127 | struct inode *bitmap_inode, | 2134 | struct inode *bitmap_inode, |
2128 | struct buffer_head *bitmap_bh, | 2135 | struct buffer_head *bitmap_bh, |
2129 | u64 start_blk, | 2136 | u64 start_blk, |
2130 | unsigned int num_clusters) | 2137 | unsigned int num_clusters, |
2138 | void (*undo_fn)(unsigned int bit, | ||
2139 | unsigned long *bitmap)) | ||
2131 | { | 2140 | { |
2132 | int status; | 2141 | int status; |
2133 | u16 bg_start_bit; | 2142 | u16 bg_start_bit; |
@@ -2154,9 +2163,9 @@ int ocfs2_free_clusters(handle_t *handle, | |||
2154 | mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", | 2163 | mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", |
2155 | (unsigned long long)bg_blkno, bg_start_bit); | 2164 | (unsigned long long)bg_blkno, bg_start_bit); |
2156 | 2165 | ||
2157 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, | 2166 | status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, |
2158 | bg_start_bit, bg_blkno, | 2167 | bg_start_bit, bg_blkno, |
2159 | num_clusters); | 2168 | num_clusters, undo_fn); |
2160 | if (status < 0) { | 2169 | if (status < 0) { |
2161 | mlog_errno(status); | 2170 | mlog_errno(status); |
2162 | goto out; | 2171 | goto out; |
@@ -2170,6 +2179,32 @@ out: | |||
2170 | return status; | 2179 | return status; |
2171 | } | 2180 | } |
2172 | 2181 | ||
2182 | int ocfs2_free_clusters(handle_t *handle, | ||
2183 | struct inode *bitmap_inode, | ||
2184 | struct buffer_head *bitmap_bh, | ||
2185 | u64 start_blk, | ||
2186 | unsigned int num_clusters) | ||
2187 | { | ||
2188 | return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, | ||
2189 | start_blk, num_clusters, | ||
2190 | _ocfs2_set_bit); | ||
2191 | } | ||
2192 | |||
2193 | /* | ||
2194 | * Give never-used clusters back to the global bitmap. We don't need | ||
2195 | * to protect these bits in the undo buffer. | ||
2196 | */ | ||
2197 | int ocfs2_release_clusters(handle_t *handle, | ||
2198 | struct inode *bitmap_inode, | ||
2199 | struct buffer_head *bitmap_bh, | ||
2200 | u64 start_blk, | ||
2201 | unsigned int num_clusters) | ||
2202 | { | ||
2203 | return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, | ||
2204 | start_blk, num_clusters, | ||
2205 | _ocfs2_clear_bit); | ||
2206 | } | ||
2207 | |||
2173 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) | 2208 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) |
2174 | { | 2209 | { |
2175 | printk("Block Group:\n"); | 2210 | printk("Block Group:\n"); |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index fa60723c43e8..e0f46df357e6 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -127,6 +127,11 @@ int ocfs2_free_clusters(handle_t *handle, | |||
127 | struct buffer_head *bitmap_bh, | 127 | struct buffer_head *bitmap_bh, |
128 | u64 start_blk, | 128 | u64 start_blk, |
129 | unsigned int num_clusters); | 129 | unsigned int num_clusters); |
130 | int ocfs2_release_clusters(handle_t *handle, | ||
131 | struct inode *bitmap_inode, | ||
132 | struct buffer_head *bitmap_bh, | ||
133 | u64 start_blk, | ||
134 | unsigned int num_clusters); | ||
130 | 135 | ||
131 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | 136 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) |
132 | { | 137 | { |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d1b0d386f6d1..3e7773089b96 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -1622,7 +1622,7 @@ static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) | |||
1622 | /* Now tell xh->xh_entries about it */ | 1622 | /* Now tell xh->xh_entries about it */ |
1623 | for (i = 0; i < count; i++) { | 1623 | for (i = 0; i < count; i++) { |
1624 | offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); | 1624 | offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); |
1625 | if (offset < namevalue_offset) | 1625 | if (offset <= namevalue_offset) |
1626 | le16_add_cpu(&xh->xh_entries[i].xe_name_offset, | 1626 | le16_add_cpu(&xh->xh_entries[i].xe_name_offset, |
1627 | namevalue_size); | 1627 | namevalue_size); |
1628 | } | 1628 | } |
@@ -6528,13 +6528,11 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode, | |||
6528 | int indexed) | 6528 | int indexed) |
6529 | { | 6529 | { |
6530 | int ret; | 6530 | int ret; |
6531 | struct ocfs2_alloc_context *meta_ac; | ||
6532 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 6531 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
6533 | struct ocfs2_xattr_set_ctxt ctxt = { | 6532 | struct ocfs2_xattr_set_ctxt ctxt; |
6534 | .meta_ac = meta_ac, | ||
6535 | }; | ||
6536 | 6533 | ||
6537 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | 6534 | memset(&ctxt, 0, sizeof(ctxt)); |
6535 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); | ||
6538 | if (ret < 0) { | 6536 | if (ret < 0) { |
6539 | mlog_errno(ret); | 6537 | mlog_errno(ret); |
6540 | return ret; | 6538 | return ret; |
@@ -6556,7 +6554,7 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode, | |||
6556 | 6554 | ||
6557 | ocfs2_commit_trans(osb, ctxt.handle); | 6555 | ocfs2_commit_trans(osb, ctxt.handle); |
6558 | out: | 6556 | out: |
6559 | ocfs2_free_alloc_context(meta_ac); | 6557 | ocfs2_free_alloc_context(ctxt.meta_ac); |
6560 | return ret; | 6558 | return ret; |
6561 | } | 6559 | } |
6562 | 6560 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index a7310841c831..b1f6e62773d3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -442,12 +442,13 @@ static const struct file_operations proc_lstats_operations = { | |||
442 | unsigned long badness(struct task_struct *p, unsigned long uptime); | 442 | unsigned long badness(struct task_struct *p, unsigned long uptime); |
443 | static int proc_oom_score(struct task_struct *task, char *buffer) | 443 | static int proc_oom_score(struct task_struct *task, char *buffer) |
444 | { | 444 | { |
445 | unsigned long points; | 445 | unsigned long points = 0; |
446 | struct timespec uptime; | 446 | struct timespec uptime; |
447 | 447 | ||
448 | do_posix_clock_monotonic_gettime(&uptime); | 448 | do_posix_clock_monotonic_gettime(&uptime); |
449 | read_lock(&tasklist_lock); | 449 | read_lock(&tasklist_lock); |
450 | points = badness(task->group_leader, uptime.tv_sec); | 450 | if (pid_alive(task)) |
451 | points = badness(task, uptime.tv_sec); | ||
451 | read_unlock(&tasklist_lock); | 452 | read_unlock(&tasklist_lock); |
452 | return sprintf(buffer, "%lu\n", points); | 453 | return sprintf(buffer, "%lu\n", points); |
453 | } | 454 | } |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 183f8ff5f400..096273984c3b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -406,6 +406,7 @@ static int show_smap(struct seq_file *m, void *v) | |||
406 | 406 | ||
407 | memset(&mss, 0, sizeof mss); | 407 | memset(&mss, 0, sizeof mss); |
408 | mss.vma = vma; | 408 | mss.vma = vma; |
409 | /* mmap_sem is held in m_start */ | ||
409 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 410 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
410 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | 411 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
411 | 412 | ||
@@ -552,7 +553,8 @@ const struct file_operations proc_clear_refs_operations = { | |||
552 | }; | 553 | }; |
553 | 554 | ||
554 | struct pagemapread { | 555 | struct pagemapread { |
555 | u64 __user *out, *end; | 556 | int pos, len; |
557 | u64 *buffer; | ||
556 | }; | 558 | }; |
557 | 559 | ||
558 | #define PM_ENTRY_BYTES sizeof(u64) | 560 | #define PM_ENTRY_BYTES sizeof(u64) |
@@ -575,10 +577,8 @@ struct pagemapread { | |||
575 | static int add_to_pagemap(unsigned long addr, u64 pfn, | 577 | static int add_to_pagemap(unsigned long addr, u64 pfn, |
576 | struct pagemapread *pm) | 578 | struct pagemapread *pm) |
577 | { | 579 | { |
578 | if (put_user(pfn, pm->out)) | 580 | pm->buffer[pm->pos++] = pfn; |
579 | return -EFAULT; | 581 | if (pm->pos >= pm->len) |
580 | pm->out++; | ||
581 | if (pm->out >= pm->end) | ||
582 | return PM_END_OF_BUFFER; | 582 | return PM_END_OF_BUFFER; |
583 | return 0; | 583 | return 0; |
584 | } | 584 | } |
@@ -720,21 +720,20 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr, | |||
720 | * determine which areas of memory are actually mapped and llseek to | 720 | * determine which areas of memory are actually mapped and llseek to |
721 | * skip over unmapped regions. | 721 | * skip over unmapped regions. |
722 | */ | 722 | */ |
723 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | ||
723 | static ssize_t pagemap_read(struct file *file, char __user *buf, | 724 | static ssize_t pagemap_read(struct file *file, char __user *buf, |
724 | size_t count, loff_t *ppos) | 725 | size_t count, loff_t *ppos) |
725 | { | 726 | { |
726 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 727 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
727 | struct page **pages, *page; | ||
728 | unsigned long uaddr, uend; | ||
729 | struct mm_struct *mm; | 728 | struct mm_struct *mm; |
730 | struct pagemapread pm; | 729 | struct pagemapread pm; |
731 | int pagecount; | ||
732 | int ret = -ESRCH; | 730 | int ret = -ESRCH; |
733 | struct mm_walk pagemap_walk = {}; | 731 | struct mm_walk pagemap_walk = {}; |
734 | unsigned long src; | 732 | unsigned long src; |
735 | unsigned long svpfn; | 733 | unsigned long svpfn; |
736 | unsigned long start_vaddr; | 734 | unsigned long start_vaddr; |
737 | unsigned long end_vaddr; | 735 | unsigned long end_vaddr; |
736 | int copied = 0; | ||
738 | 737 | ||
739 | if (!task) | 738 | if (!task) |
740 | goto out; | 739 | goto out; |
@@ -757,35 +756,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
757 | if (!mm) | 756 | if (!mm) |
758 | goto out_task; | 757 | goto out_task; |
759 | 758 | ||
760 | 759 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | |
761 | uaddr = (unsigned long)buf & PAGE_MASK; | 760 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); |
762 | uend = (unsigned long)(buf + count); | ||
763 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; | ||
764 | ret = 0; | ||
765 | if (pagecount == 0) | ||
766 | goto out_mm; | ||
767 | pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); | ||
768 | ret = -ENOMEM; | 761 | ret = -ENOMEM; |
769 | if (!pages) | 762 | if (!pm.buffer) |
770 | goto out_mm; | 763 | goto out_mm; |
771 | 764 | ||
772 | down_read(¤t->mm->mmap_sem); | ||
773 | ret = get_user_pages(current, current->mm, uaddr, pagecount, | ||
774 | 1, 0, pages, NULL); | ||
775 | up_read(¤t->mm->mmap_sem); | ||
776 | |||
777 | if (ret < 0) | ||
778 | goto out_free; | ||
779 | |||
780 | if (ret != pagecount) { | ||
781 | pagecount = ret; | ||
782 | ret = -EFAULT; | ||
783 | goto out_pages; | ||
784 | } | ||
785 | |||
786 | pm.out = (u64 __user *)buf; | ||
787 | pm.end = (u64 __user *)(buf + count); | ||
788 | |||
789 | pagemap_walk.pmd_entry = pagemap_pte_range; | 765 | pagemap_walk.pmd_entry = pagemap_pte_range; |
790 | pagemap_walk.pte_hole = pagemap_pte_hole; | 766 | pagemap_walk.pte_hole = pagemap_pte_hole; |
791 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; | 767 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; |
@@ -807,23 +783,36 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
807 | * user buffer is tracked in "pm", and the walk | 783 | * user buffer is tracked in "pm", and the walk |
808 | * will stop when we hit the end of the buffer. | 784 | * will stop when we hit the end of the buffer. |
809 | */ | 785 | */ |
810 | ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); | 786 | ret = 0; |
811 | if (ret == PM_END_OF_BUFFER) | 787 | while (count && (start_vaddr < end_vaddr)) { |
812 | ret = 0; | 788 | int len; |
813 | /* don't need mmap_sem for these, but this looks cleaner */ | 789 | unsigned long end; |
814 | *ppos += (char __user *)pm.out - buf; | 790 | |
815 | if (!ret) | 791 | pm.pos = 0; |
816 | ret = (char __user *)pm.out - buf; | 792 | end = start_vaddr + PAGEMAP_WALK_SIZE; |
817 | 793 | /* overflow ? */ | |
818 | out_pages: | 794 | if (end < start_vaddr || end > end_vaddr) |
819 | for (; pagecount; pagecount--) { | 795 | end = end_vaddr; |
820 | page = pages[pagecount-1]; | 796 | down_read(&mm->mmap_sem); |
821 | if (!PageReserved(page)) | 797 | ret = walk_page_range(start_vaddr, end, &pagemap_walk); |
822 | SetPageDirty(page); | 798 | up_read(&mm->mmap_sem); |
823 | page_cache_release(page); | 799 | start_vaddr = end; |
800 | |||
801 | len = min(count, PM_ENTRY_BYTES * pm.pos); | ||
802 | if (copy_to_user(buf, pm.buffer, len) < 0) { | ||
803 | ret = -EFAULT; | ||
804 | goto out_free; | ||
805 | } | ||
806 | copied += len; | ||
807 | buf += len; | ||
808 | count -= len; | ||
824 | } | 809 | } |
810 | *ppos += copied; | ||
811 | if (!ret || ret == PM_END_OF_BUFFER) | ||
812 | ret = copied; | ||
813 | |||
825 | out_free: | 814 | out_free: |
826 | kfree(pages); | 815 | kfree(pm.buffer); |
827 | out_mm: | 816 | out_mm: |
828 | mmput(mm); | 817 | mmput(mm); |
829 | out_task: | 818 | out_task: |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 04bf5d791bda..ab190511bc18 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -1618,10 +1618,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1618 | save_mount_options(s, data); | 1618 | save_mount_options(s, data); |
1619 | 1619 | ||
1620 | sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); | 1620 | sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); |
1621 | if (!sbi) { | 1621 | if (!sbi) |
1622 | errval = -ENOMEM; | 1622 | return -ENOMEM; |
1623 | goto error_alloc; | ||
1624 | } | ||
1625 | s->s_fs_info = sbi; | 1623 | s->s_fs_info = sbi; |
1626 | /* Set default values for options: non-aggressive tails, RO on errors */ | 1624 | /* Set default values for options: non-aggressive tails, RO on errors */ |
1627 | REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); | 1625 | REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); |
@@ -1878,12 +1876,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1878 | return (0); | 1876 | return (0); |
1879 | 1877 | ||
1880 | error: | 1878 | error: |
1881 | reiserfs_write_unlock(s); | ||
1882 | error_alloc: | ||
1883 | if (jinit_done) { /* kill the commit thread, free journal ram */ | 1879 | if (jinit_done) { /* kill the commit thread, free journal ram */ |
1884 | journal_release_error(NULL, s); | 1880 | journal_release_error(NULL, s); |
1885 | } | 1881 | } |
1886 | 1882 | ||
1883 | reiserfs_write_unlock(s); | ||
1884 | |||
1887 | reiserfs_free_bitmap_cache(s); | 1885 | reiserfs_free_bitmap_cache(s); |
1888 | if (SB_BUFFER_WITH_SB(s)) | 1886 | if (SB_BUFFER_WITH_SB(s)) |
1889 | brelse(SB_BUFFER_WITH_SB(s)); | 1887 | brelse(SB_BUFFER_WITH_SB(s)); |