aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/auth_x.c53
-rw-r--r--fs/ceph/caps.c73
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/mds_client.c43
-rw-r--r--fs/ceph/messenger.c19
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c29
-rw-r--r--fs/ceph/osd_client.h2
-rw-r--r--fs/ceph/osdmap.c17
-rw-r--r--fs/ceph/snap.c6
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/fat/namei_vfat.c6
-rw-r--r--fs/fscache/object.c6
-rw-r--r--fs/fscache/operation.c4
-rw-r--r--fs/logfs/dev_bdev.c9
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/journal.c7
-rw-r--r--fs/logfs/logfs.h1
-rw-r--r--fs/logfs/readwrite.c13
-rw-r--r--fs/logfs/segment.c54
-rw-r--r--fs/logfs/super.c15
-rw-r--r--fs/ocfs2/acl.c77
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/inode.c15
-rw-r--r--fs/ocfs2/localalloc.c10
-rw-r--r--fs/ocfs2/locks.c2
-rw-r--r--fs/ocfs2/namei.c28
-rw-r--r--fs/ocfs2/ocfs2.h14
-rw-r--r--fs/ocfs2/refcounttree.c1
-rw-r--r--fs/ocfs2/suballoc.c129
-rw-r--r--fs/ocfs2/suballoc.h5
-rw-r--r--fs/ocfs2/xattr.c12
-rw-r--r--fs/proc/base.c5
-rw-r--r--fs/proc/task_mmu.c87
-rw-r--r--fs/reiserfs/super.c10
38 files changed, 529 insertions, 268 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 23bb0ceabe31..ce8ef6107727 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -919,6 +919,10 @@ static int context_is_writeable_or_written(struct inode *inode,
919/* 919/*
920 * We are only allowed to write into/dirty the page if the page is 920 * We are only allowed to write into/dirty the page if the page is
921 * clean, or already dirty within the same snap context. 921 * clean, or already dirty within the same snap context.
922 *
923 * called with page locked.
924 * return success with page locked,
925 * or any failure (incl -EAGAIN) with page unlocked.
922 */ 926 */
923static int ceph_update_writeable_page(struct file *file, 927static int ceph_update_writeable_page(struct file *file,
924 loff_t pos, unsigned len, 928 loff_t pos, unsigned len,
@@ -961,9 +965,11 @@ retry_locked:
961 snapc = ceph_get_snap_context((void *)page->private); 965 snapc = ceph_get_snap_context((void *)page->private);
962 unlock_page(page); 966 unlock_page(page);
963 ceph_queue_writeback(inode); 967 ceph_queue_writeback(inode);
964 wait_event_interruptible(ci->i_cap_wq, 968 r = wait_event_interruptible(ci->i_cap_wq,
965 context_is_writeable_or_written(inode, snapc)); 969 context_is_writeable_or_written(inode, snapc));
966 ceph_put_snap_context(snapc); 970 ceph_put_snap_context(snapc);
971 if (r == -ERESTARTSYS)
972 return r;
967 return -EAGAIN; 973 return -EAGAIN;
968 } 974 }
969 975
@@ -1035,7 +1041,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1035 int r; 1041 int r;
1036 1042
1037 do { 1043 do {
1038 /* get a page*/ 1044 /* get a page */
1039 page = grab_cache_page_write_begin(mapping, index, 0); 1045 page = grab_cache_page_write_begin(mapping, index, 0);
1040 if (!page) 1046 if (!page)
1041 return -ENOMEM; 1047 return -ENOMEM;
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index f0318427b6da..8d8a84964763 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -28,6 +28,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
28 return (ac->want_keys & xi->have_keys) == ac->want_keys; 28 return (ac->want_keys & xi->have_keys) == ac->want_keys;
29} 29}
30 30
31static int ceph_x_encrypt_buflen(int ilen)
32{
33 return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
34 sizeof(u32);
35}
36
31static int ceph_x_encrypt(struct ceph_crypto_key *secret, 37static int ceph_x_encrypt(struct ceph_crypto_key *secret,
32 void *ibuf, int ilen, void *obuf, size_t olen) 38 void *ibuf, int ilen, void *obuf, size_t olen)
33{ 39{
@@ -150,6 +156,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
150 struct timespec validity; 156 struct timespec validity;
151 struct ceph_crypto_key old_key; 157 struct ceph_crypto_key old_key;
152 void *tp, *tpend; 158 void *tp, *tpend;
159 struct ceph_timespec new_validity;
160 struct ceph_crypto_key new_session_key;
161 struct ceph_buffer *new_ticket_blob;
162 unsigned long new_expires, new_renew_after;
163 u64 new_secret_id;
153 164
154 ceph_decode_need(&p, end, sizeof(u32) + 1, bad); 165 ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
155 166
@@ -182,16 +193,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
182 goto bad; 193 goto bad;
183 194
184 memcpy(&old_key, &th->session_key, sizeof(old_key)); 195 memcpy(&old_key, &th->session_key, sizeof(old_key));
185 ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); 196 ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
186 if (ret) 197 if (ret)
187 goto out; 198 goto out;
188 199
189 ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); 200 ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
190 ceph_decode_timespec(&validity, &th->validity); 201 ceph_decode_timespec(&validity, &new_validity);
191 th->expires = get_seconds() + validity.tv_sec; 202 new_expires = get_seconds() + validity.tv_sec;
192 th->renew_after = th->expires - (validity.tv_sec / 4); 203 new_renew_after = new_expires - (validity.tv_sec / 4);
193 dout(" expires=%lu renew_after=%lu\n", th->expires, 204 dout(" expires=%lu renew_after=%lu\n", new_expires,
194 th->renew_after); 205 new_renew_after);
195 206
196 /* ticket blob for service */ 207 /* ticket blob for service */
197 ceph_decode_8_safe(&p, end, is_enc, bad); 208 ceph_decode_8_safe(&p, end, is_enc, bad);
@@ -216,10 +227,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
216 dout(" ticket blob is %d bytes\n", dlen); 227 dout(" ticket blob is %d bytes\n", dlen);
217 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); 228 ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
218 struct_v = ceph_decode_8(&tp); 229 struct_v = ceph_decode_8(&tp);
219 th->secret_id = ceph_decode_64(&tp); 230 new_secret_id = ceph_decode_64(&tp);
220 ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); 231 ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
221 if (ret) 232 if (ret)
222 goto out; 233 goto out;
234
235 /* all is well, update our ticket */
236 ceph_crypto_key_destroy(&th->session_key);
237 if (th->ticket_blob)
238 ceph_buffer_put(th->ticket_blob);
239 th->session_key = new_session_key;
240 th->ticket_blob = new_ticket_blob;
241 th->validity = new_validity;
242 th->secret_id = new_secret_id;
243 th->expires = new_expires;
244 th->renew_after = new_renew_after;
223 dout(" got ticket service %d (%s) secret_id %lld len %d\n", 245 dout(" got ticket service %d (%s) secret_id %lld len %d\n",
224 type, ceph_entity_type_name(type), th->secret_id, 246 type, ceph_entity_type_name(type), th->secret_id,
225 (int)th->ticket_blob->vec.iov_len); 247 (int)th->ticket_blob->vec.iov_len);
@@ -242,7 +264,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
242 struct ceph_x_ticket_handler *th, 264 struct ceph_x_ticket_handler *th,
243 struct ceph_x_authorizer *au) 265 struct ceph_x_authorizer *au)
244{ 266{
245 int len; 267 int maxlen;
246 struct ceph_x_authorize_a *msg_a; 268 struct ceph_x_authorize_a *msg_a;
247 struct ceph_x_authorize_b msg_b; 269 struct ceph_x_authorize_b msg_b;
248 void *p, *end; 270 void *p, *end;
@@ -253,15 +275,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
253 dout("build_authorizer for %s %p\n", 275 dout("build_authorizer for %s %p\n",
254 ceph_entity_type_name(th->service), au); 276 ceph_entity_type_name(th->service), au);
255 277
256 len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + 278 maxlen = sizeof(*msg_a) + sizeof(msg_b) +
257 ticket_blob_len + 16; 279 ceph_x_encrypt_buflen(ticket_blob_len);
258 dout(" need len %d\n", len); 280 dout(" need len %d\n", maxlen);
259 if (au->buf && au->buf->alloc_len < len) { 281 if (au->buf && au->buf->alloc_len < maxlen) {
260 ceph_buffer_put(au->buf); 282 ceph_buffer_put(au->buf);
261 au->buf = NULL; 283 au->buf = NULL;
262 } 284 }
263 if (!au->buf) { 285 if (!au->buf) {
264 au->buf = ceph_buffer_new(len, GFP_NOFS); 286 au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
265 if (!au->buf) 287 if (!au->buf)
266 return -ENOMEM; 288 return -ENOMEM;
267 } 289 }
@@ -296,6 +318,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
296 au->buf->vec.iov_len = p - au->buf->vec.iov_base; 318 au->buf->vec.iov_len = p - au->buf->vec.iov_base;
297 dout(" built authorizer nonce %llx len %d\n", au->nonce, 319 dout(" built authorizer nonce %llx len %d\n", au->nonce,
298 (int)au->buf->vec.iov_len); 320 (int)au->buf->vec.iov_len);
321 BUG_ON(au->buf->vec.iov_len > maxlen);
299 return 0; 322 return 0;
300 323
301out_buf: 324out_buf:
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index db122bb357b8..7d0a0d0adc18 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1407,6 +1407,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
1407 */ 1407 */
1408void ceph_check_caps(struct ceph_inode_info *ci, int flags, 1408void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1409 struct ceph_mds_session *session) 1409 struct ceph_mds_session *session)
1410 __releases(session->s_mutex)
1410{ 1411{
1411 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); 1412 struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
1412 struct ceph_mds_client *mdsc = &client->mdsc; 1413 struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1414,7 +1415,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1414 struct ceph_cap *cap; 1415 struct ceph_cap *cap;
1415 int file_wanted, used; 1416 int file_wanted, used;
1416 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ 1417 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
1417 int drop_session_lock = session ? 0 : 1;
1418 int issued, implemented, want, retain, revoking, flushing = 0; 1418 int issued, implemented, want, retain, revoking, flushing = 0;
1419 int mds = -1; /* keep track of how far we've gone through i_caps list 1419 int mds = -1; /* keep track of how far we've gone through i_caps list
1420 to avoid an infinite loop on retry */ 1420 to avoid an infinite loop on retry */
@@ -1639,7 +1639,7 @@ ack:
1639 if (queue_invalidate) 1639 if (queue_invalidate)
1640 ceph_queue_invalidate(inode); 1640 ceph_queue_invalidate(inode);
1641 1641
1642 if (session && drop_session_lock) 1642 if (session)
1643 mutex_unlock(&session->s_mutex); 1643 mutex_unlock(&session->s_mutex);
1644 if (took_snap_rwsem) 1644 if (took_snap_rwsem)
1645 up_read(&mdsc->snap_rwsem); 1645 up_read(&mdsc->snap_rwsem);
@@ -2195,18 +2195,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2195 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2195 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2196 * actually be a revocation if it specifies a smaller cap set.) 2196 * actually be a revocation if it specifies a smaller cap set.)
2197 * 2197 *
2198 * caller holds s_mutex. 2198 * caller holds s_mutex and i_lock, we drop both.
2199 *
2199 * return value: 2200 * return value:
2200 * 0 - ok 2201 * 0 - ok
2201 * 1 - check_caps on auth cap only (writeback) 2202 * 1 - check_caps on auth cap only (writeback)
2202 * 2 - check_caps (ack revoke) 2203 * 2 - check_caps (ack revoke)
2203 */ 2204 */
2204static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, 2205static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2205 struct ceph_mds_session *session, 2206 struct ceph_mds_session *session,
2206 struct ceph_cap *cap, 2207 struct ceph_cap *cap,
2207 struct ceph_buffer *xattr_buf) 2208 struct ceph_buffer *xattr_buf)
2208 __releases(inode->i_lock) 2209 __releases(inode->i_lock)
2209 2210 __releases(session->s_mutex)
2210{ 2211{
2211 struct ceph_inode_info *ci = ceph_inode(inode); 2212 struct ceph_inode_info *ci = ceph_inode(inode);
2212 int mds = session->s_mds; 2213 int mds = session->s_mds;
@@ -2216,7 +2217,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2216 u64 size = le64_to_cpu(grant->size); 2217 u64 size = le64_to_cpu(grant->size);
2217 u64 max_size = le64_to_cpu(grant->max_size); 2218 u64 max_size = le64_to_cpu(grant->max_size);
2218 struct timespec mtime, atime, ctime; 2219 struct timespec mtime, atime, ctime;
2219 int reply = 0; 2220 int check_caps = 0;
2220 int wake = 0; 2221 int wake = 0;
2221 int writeback = 0; 2222 int writeback = 0;
2222 int revoked_rdcache = 0; 2223 int revoked_rdcache = 0;
@@ -2329,11 +2330,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2329 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) 2330 if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
2330 writeback = 1; /* will delay ack */ 2331 writeback = 1; /* will delay ack */
2331 else if (dirty & ~newcaps) 2332 else if (dirty & ~newcaps)
2332 reply = 1; /* initiate writeback in check_caps */ 2333 check_caps = 1; /* initiate writeback in check_caps */
2333 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || 2334 else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
2334 revoked_rdcache) 2335 revoked_rdcache)
2335 reply = 2; /* send revoke ack in check_caps */ 2336 check_caps = 2; /* send revoke ack in check_caps */
2336 cap->issued = newcaps; 2337 cap->issued = newcaps;
2338 cap->implemented |= newcaps;
2337 } else if (cap->issued == newcaps) { 2339 } else if (cap->issued == newcaps) {
2338 dout("caps unchanged: %s -> %s\n", 2340 dout("caps unchanged: %s -> %s\n",
2339 ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); 2341 ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
@@ -2346,6 +2348,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2346 * pending revocation */ 2348 * pending revocation */
2347 wake = 1; 2349 wake = 1;
2348 } 2350 }
2351 BUG_ON(cap->issued & ~cap->implemented);
2349 2352
2350 spin_unlock(&inode->i_lock); 2353 spin_unlock(&inode->i_lock);
2351 if (writeback) 2354 if (writeback)
@@ -2359,7 +2362,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2359 ceph_queue_invalidate(inode); 2362 ceph_queue_invalidate(inode);
2360 if (wake) 2363 if (wake)
2361 wake_up(&ci->i_cap_wq); 2364 wake_up(&ci->i_cap_wq);
2362 return reply; 2365
2366 if (check_caps == 1)
2367 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2368 session);
2369 else if (check_caps == 2)
2370 ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
2371 else
2372 mutex_unlock(&session->s_mutex);
2363} 2373}
2364 2374
2365/* 2375/*
@@ -2548,9 +2558,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2548 ci->i_cap_exporting_issued = cap->issued; 2558 ci->i_cap_exporting_issued = cap->issued;
2549 } 2559 }
2550 __ceph_remove_cap(cap); 2560 __ceph_remove_cap(cap);
2551 } else {
2552 WARN_ON(!cap);
2553 } 2561 }
2562 /* else, we already released it */
2554 2563
2555 spin_unlock(&inode->i_lock); 2564 spin_unlock(&inode->i_lock);
2556} 2565}
@@ -2621,9 +2630,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2621 u64 cap_id; 2630 u64 cap_id;
2622 u64 size, max_size; 2631 u64 size, max_size;
2623 u64 tid; 2632 u64 tid;
2624 int check_caps = 0;
2625 void *snaptrace; 2633 void *snaptrace;
2626 int r;
2627 2634
2628 dout("handle_caps from mds%d\n", mds); 2635 dout("handle_caps from mds%d\n", mds);
2629 2636
@@ -2668,8 +2675,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2668 case CEPH_CAP_OP_IMPORT: 2675 case CEPH_CAP_OP_IMPORT:
2669 handle_cap_import(mdsc, inode, h, session, 2676 handle_cap_import(mdsc, inode, h, session,
2670 snaptrace, le32_to_cpu(h->snap_trace_len)); 2677 snaptrace, le32_to_cpu(h->snap_trace_len));
2671 check_caps = 1; /* we may have sent a RELEASE to the old auth */ 2678 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
2672 goto done; 2679 session);
2680 goto done_unlocked;
2673 } 2681 }
2674 2682
2675 /* the rest require a cap */ 2683 /* the rest require a cap */
@@ -2686,16 +2694,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2686 switch (op) { 2694 switch (op) {
2687 case CEPH_CAP_OP_REVOKE: 2695 case CEPH_CAP_OP_REVOKE:
2688 case CEPH_CAP_OP_GRANT: 2696 case CEPH_CAP_OP_GRANT:
2689 r = handle_cap_grant(inode, h, session, cap, msg->middle); 2697 handle_cap_grant(inode, h, session, cap, msg->middle);
2690 if (r == 1) 2698 goto done_unlocked;
2691 ceph_check_caps(ceph_inode(inode),
2692 CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
2693 session);
2694 else if (r == 2)
2695 ceph_check_caps(ceph_inode(inode),
2696 CHECK_CAPS_NODELAY,
2697 session);
2698 break;
2699 2699
2700 case CEPH_CAP_OP_FLUSH_ACK: 2700 case CEPH_CAP_OP_FLUSH_ACK:
2701 handle_cap_flush_ack(inode, tid, h, session, cap); 2701 handle_cap_flush_ack(inode, tid, h, session, cap);
@@ -2713,9 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2713 2713
2714done: 2714done:
2715 mutex_unlock(&session->s_mutex); 2715 mutex_unlock(&session->s_mutex);
2716 2716done_unlocked:
2717 if (check_caps)
2718 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
2719 if (inode) 2717 if (inode)
2720 iput(inode); 2718 iput(inode);
2721 return; 2719 return;
@@ -2838,11 +2836,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2838 struct ceph_cap *cap; 2836 struct ceph_cap *cap;
2839 struct ceph_mds_request_release *rel = *p; 2837 struct ceph_mds_request_release *rel = *p;
2840 int ret = 0; 2838 int ret = 0;
2841 2839 int used = 0;
2842 dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
2843 mds, ceph_cap_string(drop), ceph_cap_string(unless));
2844 2840
2845 spin_lock(&inode->i_lock); 2841 spin_lock(&inode->i_lock);
2842 used = __ceph_caps_used(ci);
2843
2844 dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
2845 mds, ceph_cap_string(used), ceph_cap_string(drop),
2846 ceph_cap_string(unless));
2847
2848 /* only drop unused caps */
2849 drop &= ~used;
2850
2846 cap = __get_cap_for_mds(ci, mds); 2851 cap = __get_cap_for_mds(ci, mds);
2847 if (cap && __cap_is_valid(cap)) { 2852 if (cap && __cap_is_valid(cap)) {
2848 if (force || 2853 if (force ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 5107384ee029..8a9116e15b70 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -288,8 +288,10 @@ more:
288 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; 288 CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
289 289
290 /* discard old result, if any */ 290 /* discard old result, if any */
291 if (fi->last_readdir) 291 if (fi->last_readdir) {
292 ceph_mdsc_put_request(fi->last_readdir); 292 ceph_mdsc_put_request(fi->last_readdir);
293 fi->last_readdir = NULL;
294 }
293 295
294 /* requery frag tree, as the frag topology may have changed */ 296 /* requery frag tree, as the frag topology may have changed */
295 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); 297 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7abe1aed819b..aca82d55cc53 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode)
378 378
379 ceph_queue_caps_release(inode); 379 ceph_queue_caps_release(inode);
380 380
381 /*
382 * we may still have a snap_realm reference if there are stray
383 * caps in i_cap_exporting_issued or i_snap_caps.
384 */
385 if (ci->i_snap_realm) {
386 struct ceph_mds_client *mdsc =
387 &ceph_client(ci->vfs_inode.i_sb)->mdsc;
388 struct ceph_snap_realm *realm = ci->i_snap_realm;
389
390 dout(" dropping residual ref to snap realm %p\n", realm);
391 spin_lock(&realm->inodes_with_caps_lock);
392 list_del_init(&ci->i_snap_realm_item);
393 spin_unlock(&realm->inodes_with_caps_lock);
394 ceph_put_snap_realm(mdsc, realm);
395 }
396
381 kfree(ci->i_symlink); 397 kfree(ci->i_symlink);
382 while ((n = rb_first(&ci->i_fragtree)) != NULL) { 398 while ((n = rb_first(&ci->i_fragtree)) != NULL) {
383 frag = rb_entry(n, struct ceph_inode_frag, node); 399 frag = rb_entry(n, struct ceph_inode_frag, node);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a2600101ec22..5c7920be6420 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -328,6 +328,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
328 struct ceph_mds_session *s; 328 struct ceph_mds_session *s;
329 329
330 s = kzalloc(sizeof(*s), GFP_NOFS); 330 s = kzalloc(sizeof(*s), GFP_NOFS);
331 if (!s)
332 return ERR_PTR(-ENOMEM);
331 s->s_mdsc = mdsc; 333 s->s_mdsc = mdsc;
332 s->s_mds = mds; 334 s->s_mds = mds;
333 s->s_state = CEPH_MDS_SESSION_NEW; 335 s->s_state = CEPH_MDS_SESSION_NEW;
@@ -529,7 +531,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
529{ 531{
530 dout("__unregister_request %p tid %lld\n", req, req->r_tid); 532 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
531 rb_erase(&req->r_node, &mdsc->request_tree); 533 rb_erase(&req->r_node, &mdsc->request_tree);
532 ceph_mdsc_put_request(req); 534 RB_CLEAR_NODE(&req->r_node);
533 535
534 if (req->r_unsafe_dir) { 536 if (req->r_unsafe_dir) {
535 struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); 537 struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
@@ -538,6 +540,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
538 list_del_init(&req->r_unsafe_dir_item); 540 list_del_init(&req->r_unsafe_dir_item);
539 spin_unlock(&ci->i_unsafe_lock); 541 spin_unlock(&ci->i_unsafe_lock);
540 } 542 }
543
544 ceph_mdsc_put_request(req);
541} 545}
542 546
543/* 547/*
@@ -862,6 +866,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
862 if (time_after_eq(jiffies, session->s_cap_ttl) && 866 if (time_after_eq(jiffies, session->s_cap_ttl) &&
863 time_after_eq(session->s_cap_ttl, session->s_renew_requested)) 867 time_after_eq(session->s_cap_ttl, session->s_renew_requested))
864 pr_info("mds%d caps stale\n", session->s_mds); 868 pr_info("mds%d caps stale\n", session->s_mds);
869 session->s_renew_requested = jiffies;
865 870
866 /* do not try to renew caps until a recovering mds has reconnected 871 /* do not try to renew caps until a recovering mds has reconnected
867 * with its clients. */ 872 * with its clients. */
@@ -874,7 +879,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
874 879
875 dout("send_renew_caps to mds%d (%s)\n", session->s_mds, 880 dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
876 ceph_mds_state_name(state)); 881 ceph_mds_state_name(state));
877 session->s_renew_requested = jiffies;
878 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, 882 msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
879 ++session->s_renew_seq); 883 ++session->s_renew_seq);
880 if (IS_ERR(msg)) 884 if (IS_ERR(msg))
@@ -1566,8 +1570,13 @@ static int __do_request(struct ceph_mds_client *mdsc,
1566 1570
1567 /* get, open session */ 1571 /* get, open session */
1568 session = __ceph_lookup_mds_session(mdsc, mds); 1572 session = __ceph_lookup_mds_session(mdsc, mds);
1569 if (!session) 1573 if (!session) {
1570 session = register_session(mdsc, mds); 1574 session = register_session(mdsc, mds);
1575 if (IS_ERR(session)) {
1576 err = PTR_ERR(session);
1577 goto finish;
1578 }
1579 }
1571 dout("do_request mds%d session %p state %s\n", mds, session, 1580 dout("do_request mds%d session %p state %s\n", mds, session,
1572 session_state_name(session->s_state)); 1581 session_state_name(session->s_state));
1573 if (session->s_state != CEPH_MDS_SESSION_OPEN && 1582 if (session->s_state != CEPH_MDS_SESSION_OPEN &&
@@ -1770,7 +1779,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
1770 dout("handle_reply %p\n", req); 1779 dout("handle_reply %p\n", req);
1771 1780
1772 /* correct session? */ 1781 /* correct session? */
1773 if (!req->r_session && req->r_session != session) { 1782 if (req->r_session != session) {
1774 pr_err("mdsc_handle_reply got %llu on session mds%d" 1783 pr_err("mdsc_handle_reply got %llu on session mds%d"
1775 " not mds%d\n", tid, session->s_mds, 1784 " not mds%d\n", tid, session->s_mds,
1776 req->r_session ? req->r_session->s_mds : -1); 1785 req->r_session ? req->r_session->s_mds : -1);
@@ -2682,29 +2691,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
2682 */ 2691 */
2683static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) 2692static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
2684{ 2693{
2685 struct ceph_mds_request *req = NULL; 2694 struct ceph_mds_request *req = NULL, *nextreq;
2686 struct rb_node *n; 2695 struct rb_node *n;
2687 2696
2688 mutex_lock(&mdsc->mutex); 2697 mutex_lock(&mdsc->mutex);
2689 dout("wait_unsafe_requests want %lld\n", want_tid); 2698 dout("wait_unsafe_requests want %lld\n", want_tid);
2699restart:
2690 req = __get_oldest_req(mdsc); 2700 req = __get_oldest_req(mdsc);
2691 while (req && req->r_tid <= want_tid) { 2701 while (req && req->r_tid <= want_tid) {
2702 /* find next request */
2703 n = rb_next(&req->r_node);
2704 if (n)
2705 nextreq = rb_entry(n, struct ceph_mds_request, r_node);
2706 else
2707 nextreq = NULL;
2692 if ((req->r_op & CEPH_MDS_OP_WRITE)) { 2708 if ((req->r_op & CEPH_MDS_OP_WRITE)) {
2693 /* write op */ 2709 /* write op */
2694 ceph_mdsc_get_request(req); 2710 ceph_mdsc_get_request(req);
2711 if (nextreq)
2712 ceph_mdsc_get_request(nextreq);
2695 mutex_unlock(&mdsc->mutex); 2713 mutex_unlock(&mdsc->mutex);
2696 dout("wait_unsafe_requests wait on %llu (want %llu)\n", 2714 dout("wait_unsafe_requests wait on %llu (want %llu)\n",
2697 req->r_tid, want_tid); 2715 req->r_tid, want_tid);
2698 wait_for_completion(&req->r_safe_completion); 2716 wait_for_completion(&req->r_safe_completion);
2699 mutex_lock(&mdsc->mutex); 2717 mutex_lock(&mdsc->mutex);
2700 n = rb_next(&req->r_node);
2701 ceph_mdsc_put_request(req); 2718 ceph_mdsc_put_request(req);
2702 } else { 2719 if (!nextreq)
2703 n = rb_next(&req->r_node); 2720 break; /* next dne before, so we're done! */
2721 if (RB_EMPTY_NODE(&nextreq->r_node)) {
2722 /* next request was removed from tree */
2723 ceph_mdsc_put_request(nextreq);
2724 goto restart;
2725 }
2726 ceph_mdsc_put_request(nextreq); /* won't go away */
2704 } 2727 }
2705 if (!n) 2728 req = nextreq;
2706 break;
2707 req = rb_entry(n, struct ceph_mds_request, r_node);
2708 } 2729 }
2709 mutex_unlock(&mdsc->mutex); 2730 mutex_unlock(&mdsc->mutex);
2710 dout("wait_unsafe_requests done\n"); 2731 dout("wait_unsafe_requests done\n");
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 781656a49bf8..a32f0f896d9f 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
366} 366}
367 367
368/* 368/*
369 * return true if this connection ever successfully opened
370 */
371bool ceph_con_opened(struct ceph_connection *con)
372{
373 return con->connect_seq > 0;
374}
375
376/*
369 * generic get/put 377 * generic get/put
370 */ 378 */
371struct ceph_connection *ceph_con_get(struct ceph_connection *con) 379struct ceph_connection *ceph_con_get(struct ceph_connection *con)
@@ -830,13 +838,6 @@ static void prepare_read_connect(struct ceph_connection *con)
830 con->in_base_pos = 0; 838 con->in_base_pos = 0;
831} 839}
832 840
833static void prepare_read_connect_retry(struct ceph_connection *con)
834{
835 dout("prepare_read_connect_retry %p\n", con);
836 con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr)
837 + sizeof(con->peer_addr_for_me);
838}
839
840static void prepare_read_ack(struct ceph_connection *con) 841static void prepare_read_ack(struct ceph_connection *con)
841{ 842{
842 dout("prepare_read_ack %p\n", con); 843 dout("prepare_read_ack %p\n", con);
@@ -1146,7 +1147,7 @@ static int process_connect(struct ceph_connection *con)
1146 } 1147 }
1147 con->auth_retry = 1; 1148 con->auth_retry = 1;
1148 prepare_write_connect(con->msgr, con, 0); 1149 prepare_write_connect(con->msgr, con, 0);
1149 prepare_read_connect_retry(con); 1150 prepare_read_connect(con);
1150 break; 1151 break;
1151 1152
1152 case CEPH_MSGR_TAG_RESETSESSION: 1153 case CEPH_MSGR_TAG_RESETSESSION:
@@ -1843,8 +1844,6 @@ static void ceph_fault(struct ceph_connection *con)
1843 goto out; 1844 goto out;
1844 } 1845 }
1845 1846
1846 clear_bit(BUSY, &con->state); /* to avoid an improbable race */
1847
1848 mutex_lock(&con->mutex); 1847 mutex_lock(&con->mutex);
1849 if (test_bit(CLOSED, &con->state)) 1848 if (test_bit(CLOSED, &con->state))
1850 goto out_unlock; 1849 goto out_unlock;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 4caaa5911110..a343dae73cdc 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr,
223 struct ceph_connection *con); 223 struct ceph_connection *con);
224extern void ceph_con_open(struct ceph_connection *con, 224extern void ceph_con_open(struct ceph_connection *con,
225 struct ceph_entity_addr *addr); 225 struct ceph_entity_addr *addr);
226extern bool ceph_con_opened(struct ceph_connection *con);
226extern void ceph_con_close(struct ceph_connection *con); 227extern void ceph_con_close(struct ceph_connection *con);
227extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); 228extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
228extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); 229extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index dbe63db9762f..c7b4dedaace6 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
413 */ 413 */
414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) 414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
415{ 415{
416 struct ceph_osd_request *req;
416 int ret = 0; 417 int ret = 0;
417 418
418 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 419 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
419 if (list_empty(&osd->o_requests)) { 420 if (list_empty(&osd->o_requests)) {
420 __remove_osd(osdc, osd); 421 __remove_osd(osdc, osd);
422 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
423 &osd->o_con.peer_addr,
424 sizeof(osd->o_con.peer_addr)) == 0 &&
425 !ceph_con_opened(&osd->o_con)) {
426 dout(" osd addr hasn't changed and connection never opened,"
427 " letting msgr retry");
428 /* touch each r_stamp for handle_timeout()'s benfit */
429 list_for_each_entry(req, &osd->o_requests, r_osd_item)
430 req->r_stamp = jiffies;
431 ret = -EAGAIN;
421 } else { 432 } else {
422 ceph_con_close(&osd->o_con); 433 ceph_con_close(&osd->o_con);
423 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); 434 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
@@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc,
633 reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ 644 reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */
634 reqhead->reassert_version = req->r_reassert_version; 645 reqhead->reassert_version = req->r_reassert_version;
635 646
636 req->r_sent_stamp = jiffies; 647 req->r_stamp = jiffies;
637 list_move_tail(&osdc->req_lru, &req->r_req_lru_item); 648 list_move_tail(&osdc->req_lru, &req->r_req_lru_item);
638 649
639 ceph_msg_get(req->r_request); /* send consumes a ref */ 650 ceph_msg_get(req->r_request); /* send consumes a ref */
@@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work)
660 unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; 671 unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
661 unsigned long keepalive = 672 unsigned long keepalive =
662 osdc->client->mount_args->osd_keepalive_timeout * HZ; 673 osdc->client->mount_args->osd_keepalive_timeout * HZ;
663 unsigned long last_sent = 0; 674 unsigned long last_stamp = 0;
664 struct rb_node *p; 675 struct rb_node *p;
665 struct list_head slow_osds; 676 struct list_head slow_osds;
666 677
@@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work)
697 req = list_entry(osdc->req_lru.next, struct ceph_osd_request, 708 req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
698 r_req_lru_item); 709 r_req_lru_item);
699 710
700 if (time_before(jiffies, req->r_sent_stamp + timeout)) 711 if (time_before(jiffies, req->r_stamp + timeout))
701 break; 712 break;
702 713
703 BUG_ON(req == last_req && req->r_sent_stamp == last_sent); 714 BUG_ON(req == last_req && req->r_stamp == last_stamp);
704 last_req = req; 715 last_req = req;
705 last_sent = req->r_sent_stamp; 716 last_stamp = req->r_stamp;
706 717
707 osd = req->r_osd; 718 osd = req->r_osd;
708 BUG_ON(!osd); 719 BUG_ON(!osd);
@@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work)
718 */ 729 */
719 INIT_LIST_HEAD(&slow_osds); 730 INIT_LIST_HEAD(&slow_osds);
720 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { 731 list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
721 if (time_before(jiffies, req->r_sent_stamp + keepalive)) 732 if (time_before(jiffies, req->r_stamp + keepalive))
722 break; 733 break;
723 734
724 osd = req->r_osd; 735 osd = req->r_osd;
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc,
862 873
863 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); 874 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
864 if (kickosd) { 875 if (kickosd) {
865 __reset_osd(osdc, kickosd); 876 err = __reset_osd(osdc, kickosd);
877 if (err == -EAGAIN)
878 return 1;
866 } else { 879 } else {
867 for (p = rb_first(&osdc->osds); p; p = n) { 880 for (p = rb_first(&osdc->osds); p; p = n) {
868 struct ceph_osd *osd = 881 struct ceph_osd *osd =
@@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc,
913 926
914kick: 927kick:
915 dout("kicking %p tid %llu osd%d\n", req, req->r_tid, 928 dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
916 req->r_osd->o_osd); 929 req->r_osd ? req->r_osd->o_osd : -1);
917 req->r_flags |= CEPH_OSD_FLAG_RETRY; 930 req->r_flags |= CEPH_OSD_FLAG_RETRY;
918 err = __send_request(osdc, req); 931 err = __send_request(osdc, req);
919 if (err) { 932 if (err) {
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index 1b1a3ca43afc..b0759911e7c3 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -70,7 +70,7 @@ struct ceph_osd_request {
70 70
71 char r_oid[40]; /* object name */ 71 char r_oid[40]; /* object name */
72 int r_oid_len; 72 int r_oid_len;
73 unsigned long r_sent_stamp; 73 unsigned long r_stamp; /* send OR check time */
74 bool r_resend; /* msg send failed, needs retry */ 74 bool r_resend; /* msg send failed, needs retry */
75 75
76 struct ceph_file_layout r_file_layout; 76 struct ceph_file_layout r_file_layout;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index b83f2692b835..d82fe87c2a6e 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -480,6 +480,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
480 return NULL; 480 return NULL;
481} 481}
482 482
483void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
484{
485 ceph_decode_copy(p, &pi->v, sizeof(pi->v));
486 calc_pg_masks(pi);
487 *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
488 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
489}
490
483/* 491/*
484 * decode a full map. 492 * decode a full map.
485 */ 493 */
@@ -526,12 +534,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
526 ev, CEPH_PG_POOL_VERSION); 534 ev, CEPH_PG_POOL_VERSION);
527 goto bad; 535 goto bad;
528 } 536 }
529 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 537 __decode_pool(p, pi);
530 __insert_pg_pool(&map->pg_pools, pi); 538 __insert_pg_pool(&map->pg_pools, pi);
531 calc_pg_masks(pi);
532 *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
533 *p += le32_to_cpu(pi->v.num_removed_snap_intervals)
534 * sizeof(u64) * 2;
535 } 539 }
536 ceph_decode_32_safe(p, end, map->pool_max, bad); 540 ceph_decode_32_safe(p, end, map->pool_max, bad);
537 541
@@ -714,8 +718,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
714 pi->id = pool; 718 pi->id = pool;
715 __insert_pg_pool(&map->pg_pools, pi); 719 __insert_pg_pool(&map->pg_pools, pi);
716 } 720 }
717 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 721 __decode_pool(p, pi);
718 calc_pg_masks(pi);
719 } 722 }
720 723
721 /* old_pool */ 724 /* old_pool */
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index bf2a5f3846a4..df04e210a055 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -314,9 +314,9 @@ static int build_snap_context(struct ceph_snap_realm *realm)
314 because we rebuild_snap_realms() works _downward_ in 314 because we rebuild_snap_realms() works _downward_ in
315 hierarchy after each update.) */ 315 hierarchy after each update.) */
316 if (realm->cached_context && 316 if (realm->cached_context &&
317 realm->cached_context->seq <= realm->seq && 317 realm->cached_context->seq == realm->seq &&
318 (!parent || 318 (!parent ||
319 realm->cached_context->seq <= parent->cached_context->seq)) { 319 realm->cached_context->seq >= parent->cached_context->seq)) {
320 dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" 320 dout("build_snap_context %llx %p: %p seq %lld (%d snaps)"
321 " (unchanged)\n", 321 " (unchanged)\n",
322 realm->ino, realm, realm->cached_context, 322 realm->ino, realm, realm->cached_context,
@@ -818,7 +818,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
818 * queued (again) by ceph_update_snap_trace() 818 * queued (again) by ceph_update_snap_trace()
819 * below. Queue it _now_, under the old context. 819 * below. Queue it _now_, under the old context.
820 */ 820 */
821 spin_lock(&realm->inodes_with_caps_lock);
821 list_del_init(&ci->i_snap_realm_item); 822 list_del_init(&ci->i_snap_realm_item);
823 spin_unlock(&realm->inodes_with_caps_lock);
822 spin_unlock(&inode->i_lock); 824 spin_unlock(&inode->i_lock);
823 825
824 ceph_queue_cap_snap(ci, 826 ceph_queue_cap_snap(ci,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index ef9008b885b5..0d0e97ed3ff6 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -582,7 +582,9 @@ got:
582 inode->i_generation = sbi->s_next_generation++; 582 inode->i_generation = sbi->s_next_generation++;
583 spin_unlock(&sbi->s_next_gen_lock); 583 spin_unlock(&sbi->s_next_gen_lock);
584 584
585 ei->i_state = EXT3_STATE_NEW; 585 ei->i_state_flags = 0;
586 ext3_set_inode_state(inode, EXT3_STATE_NEW);
587
586 ei->i_extra_isize = 588 ei->i_extra_isize =
587 (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? 589 (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
588 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; 590 sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7f920b7263a4..ea33bdf0a300 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2811 inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); 2811 inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
2812 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; 2812 inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
2813 2813
2814 ei->i_state = 0; 2814 ei->i_state_flags = 0;
2815 ei->i_dir_start_lookup = 0; 2815 ei->i_dir_start_lookup = 0;
2816 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 2816 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
2817 /* We now have enough fields to check if the inode was active or not. 2817 /* We now have enough fields to check if the inode was active or not.
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c1ef50154868..6fcc7e71fbaa 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -309,7 +309,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
309{ 309{
310 struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options; 310 struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options;
311 wchar_t *ip, *ext_start, *end, *name_start; 311 wchar_t *ip, *ext_start, *end, *name_start;
312 unsigned char base[9], ext[4], buf[8], *p; 312 unsigned char base[9], ext[4], buf[5], *p;
313 unsigned char charbuf[NLS_MAX_CHARSET_SIZE]; 313 unsigned char charbuf[NLS_MAX_CHARSET_SIZE];
314 int chl, chi; 314 int chl, chi;
315 int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen; 315 int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen;
@@ -467,7 +467,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
467 return 0; 467 return 0;
468 } 468 }
469 469
470 i = jiffies & 0xffff; 470 i = jiffies;
471 sz = (jiffies >> 16) & 0x7; 471 sz = (jiffies >> 16) & 0x7;
472 if (baselen > 2) { 472 if (baselen > 2) {
473 baselen = numtail2_baselen; 473 baselen = numtail2_baselen;
@@ -476,7 +476,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
476 name_res[baselen + 4] = '~'; 476 name_res[baselen + 4] = '~';
477 name_res[baselen + 5] = '1' + sz; 477 name_res[baselen + 5] = '1' + sz;
478 while (1) { 478 while (1) {
479 sprintf(buf, "%04X", i); 479 snprintf(buf, sizeof(buf), "%04X", i & 0xffff);
480 memcpy(&name_res[baselen], buf, 4); 480 memcpy(&name_res[baselen], buf, 4);
481 if (vfat_find_form(dir, name_res) < 0) 481 if (vfat_find_form(dir, name_res) < 0)
482 break; 482 break;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index e513ac599c8e..0b589a9b4ffc 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
53static void fscache_object_slow_work_put_ref(struct slow_work *); 53static void fscache_object_slow_work_put_ref(struct slow_work *);
54static int fscache_object_slow_work_get_ref(struct slow_work *); 54static int fscache_object_slow_work_get_ref(struct slow_work *);
55static void fscache_object_slow_work_execute(struct slow_work *); 55static void fscache_object_slow_work_execute(struct slow_work *);
56#ifdef CONFIG_SLOW_WORK_PROC 56#ifdef CONFIG_SLOW_WORK_DEBUG
57static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); 57static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
58#endif 58#endif
59static void fscache_initialise_object(struct fscache_object *); 59static void fscache_initialise_object(struct fscache_object *);
@@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = {
69 .get_ref = fscache_object_slow_work_get_ref, 69 .get_ref = fscache_object_slow_work_get_ref,
70 .put_ref = fscache_object_slow_work_put_ref, 70 .put_ref = fscache_object_slow_work_put_ref,
71 .execute = fscache_object_slow_work_execute, 71 .execute = fscache_object_slow_work_execute,
72#ifdef CONFIG_SLOW_WORK_PROC 72#ifdef CONFIG_SLOW_WORK_DEBUG
73 .desc = fscache_object_slow_work_desc, 73 .desc = fscache_object_slow_work_desc,
74#endif 74#endif
75}; 75};
@@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
364/* 364/*
365 * describe an object for slow-work debugging 365 * describe an object for slow-work debugging
366 */ 366 */
367#ifdef CONFIG_SLOW_WORK_PROC 367#ifdef CONFIG_SLOW_WORK_DEBUG
368static void fscache_object_slow_work_desc(struct slow_work *work, 368static void fscache_object_slow_work_desc(struct slow_work *work,
369 struct seq_file *m) 369 struct seq_file *m)
370{ 370{
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 313e79a14266..9f6c928d4586 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -500,7 +500,7 @@ static void fscache_op_execute(struct slow_work *work)
500/* 500/*
501 * describe an operation for slow-work debugging 501 * describe an operation for slow-work debugging
502 */ 502 */
503#ifdef CONFIG_SLOW_WORK_PROC 503#ifdef CONFIG_SLOW_WORK_DEBUG
504static void fscache_op_desc(struct slow_work *work, struct seq_file *m) 504static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
505{ 505{
506 struct fscache_operation *op = 506 struct fscache_operation *op =
@@ -517,7 +517,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = {
517 .get_ref = fscache_op_get_ref, 517 .get_ref = fscache_op_get_ref,
518 .put_ref = fscache_op_put_ref, 518 .put_ref = fscache_op_put_ref,
519 .execute = fscache_op_execute, 519 .execute = fscache_op_execute,
520#ifdef CONFIG_SLOW_WORK_PROC 520#ifdef CONFIG_SLOW_WORK_DEBUG
521 .desc = fscache_op_desc, 521 .desc = fscache_op_desc,
522#endif 522#endif
523}; 523};
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9718c22f186d..a5d0c56d3ebc 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -80,6 +80,7 @@ static void writeseg_end_io(struct bio *bio, int err)
80 prefetchw(&bvec->bv_page->flags); 80 prefetchw(&bvec->bv_page->flags);
81 81
82 end_page_writeback(page); 82 end_page_writeback(page);
83 page_cache_release(page);
83 } while (bvec >= bio->bi_io_vec); 84 } while (bvec >= bio->bi_io_vec);
84 bio_put(bio); 85 bio_put(bio);
85 if (atomic_dec_and_test(&super->s_pending_writes)) 86 if (atomic_dec_and_test(&super->s_pending_writes))
@@ -97,8 +98,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
97 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); 98 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
98 int i; 99 int i;
99 100
101 if (max_pages > BIO_MAX_PAGES)
102 max_pages = BIO_MAX_PAGES;
100 bio = bio_alloc(GFP_NOFS, max_pages); 103 bio = bio_alloc(GFP_NOFS, max_pages);
101 BUG_ON(!bio); /* FIXME: handle this */ 104 BUG_ON(!bio);
102 105
103 for (i = 0; i < nr_pages; i++) { 106 for (i = 0; i < nr_pages; i++) {
104 if (i >= max_pages) { 107 if (i >= max_pages) {
@@ -191,8 +194,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
191 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); 194 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
192 int i; 195 int i;
193 196
197 if (max_pages > BIO_MAX_PAGES)
198 max_pages = BIO_MAX_PAGES;
194 bio = bio_alloc(GFP_NOFS, max_pages); 199 bio = bio_alloc(GFP_NOFS, max_pages);
195 BUG_ON(!bio); /* FIXME: handle this */ 200 BUG_ON(!bio);
196 201
197 for (i = 0; i < nr_pages; i++) { 202 for (i = 0; i < nr_pages; i++) {
198 if (i >= max_pages) { 203 if (i >= max_pages) {
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 56a8bfbb0120..c76b4b5c7ff6 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
303 (filler_t *)logfs_readpage, NULL); 303 (filler_t *)logfs_readpage, NULL);
304 if (IS_ERR(page)) 304 if (IS_ERR(page))
305 return PTR_ERR(page); 305 return PTR_ERR(page);
306 dd = kmap_atomic(page, KM_USER0); 306 dd = kmap(page);
307 BUG_ON(dd->namelen == 0); 307 BUG_ON(dd->namelen == 0);
308 308
309 full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), 309 full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen),
310 pos, be64_to_cpu(dd->ino), dd->type); 310 pos, be64_to_cpu(dd->ino), dd->type);
311 kunmap_atomic(dd, KM_USER0); 311 kunmap(page);
312 page_cache_release(page); 312 page_cache_release(page);
313 if (full) 313 if (full)
314 break; 314 break;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 6ad30a4c9052..d57c7b07b60b 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -800,6 +800,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
800{ 800{
801 struct logfs_super *super = logfs_super(sb); 801 struct logfs_super *super = logfs_super(sb);
802 struct logfs_area *area = super->s_journal_area; 802 struct logfs_area *area = super->s_journal_area;
803 struct btree_head32 *head = &super->s_reserved_segments;
803 u32 segno, ec; 804 u32 segno, ec;
804 int i, err; 805 int i, err;
805 806
@@ -807,6 +808,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
807 /* Drop old segments */ 808 /* Drop old segments */
808 journal_for_each(i) 809 journal_for_each(i)
809 if (super->s_journal_seg[i]) { 810 if (super->s_journal_seg[i]) {
811 btree_remove32(head, super->s_journal_seg[i]);
810 logfs_set_segment_unreserved(sb, 812 logfs_set_segment_unreserved(sb,
811 super->s_journal_seg[i], 813 super->s_journal_seg[i],
812 super->s_journal_ec[i]); 814 super->s_journal_ec[i]);
@@ -819,8 +821,13 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
819 super->s_journal_seg[i] = segno; 821 super->s_journal_seg[i] = segno;
820 super->s_journal_ec[i] = ec; 822 super->s_journal_ec[i] = ec;
821 logfs_set_segment_reserved(sb, segno); 823 logfs_set_segment_reserved(sb, segno);
824 err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
825 BUG_ON(err); /* mempool should prevent this */
826 err = logfs_erase_segment(sb, segno, 1);
827 BUG_ON(err); /* FIXME: remount-ro would be nicer */
822 } 828 }
823 /* Manually move journal_area */ 829 /* Manually move journal_area */
830 freeseg(sb, area->a_segno);
824 area->a_segno = super->s_journal_seg[0]; 831 area->a_segno = super->s_journal_seg[0];
825 area->a_is_open = 0; 832 area->a_is_open = 0;
826 area->a_used_bytes = 0; 833 area->a_used_bytes = 0;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 129779431373..b84b0eec6024 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -587,6 +587,7 @@ void move_page_to_btree(struct page *page);
587int logfs_init_mapping(struct super_block *sb); 587int logfs_init_mapping(struct super_block *sb);
588void logfs_sync_area(struct logfs_area *area); 588void logfs_sync_area(struct logfs_area *area);
589void logfs_sync_segments(struct super_block *sb); 589void logfs_sync_segments(struct super_block *sb);
590void freeseg(struct super_block *sb, u32 segno);
590 591
591/* area handling */ 592/* area handling */
592int logfs_init_areas(struct super_block *sb); 593int logfs_init_areas(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 7a23b3e7c0a7..c3a3a6814b84 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1594,7 +1594,6 @@ int logfs_delete(struct inode *inode, pgoff_t index,
1594 return ret; 1594 return ret;
1595} 1595}
1596 1596
1597/* Rewrite cannot mark the inode dirty but has to write it immediatly. */
1598int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, 1597int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
1599 gc_level_t gc_level, long flags) 1598 gc_level_t gc_level, long flags)
1600{ 1599{
@@ -1611,6 +1610,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
1611 if (level != 0) 1610 if (level != 0)
1612 alloc_indirect_block(inode, page, 0); 1611 alloc_indirect_block(inode, page, 0);
1613 err = logfs_write_buf(inode, page, flags); 1612 err = logfs_write_buf(inode, page, flags);
1613 if (!err && shrink_level(gc_level) == 0) {
1614 /* Rewrite cannot mark the inode dirty but has to
1615 * write it immediatly.
1616 * Q: Can't we just create an alias for the inode
1617 * instead? And if not, why not?
1618 */
1619 if (inode->i_ino == LOGFS_INO_MASTER)
1620 logfs_write_anchor(inode->i_sb);
1621 else {
1622 err = __logfs_write_inode(inode, flags);
1623 }
1624 }
1614 } 1625 }
1615 logfs_put_write_page(page); 1626 logfs_put_write_page(page);
1616 return err; 1627 return err;
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 1a14f9910d55..0ecd8f07c11e 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -93,50 +93,58 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
93 } while (len); 93 } while (len);
94} 94}
95 95
96/* 96static void pad_partial_page(struct logfs_area *area)
97 * bdev_writeseg will write full pages. Memset the tail to prevent data leaks.
98 */
99static void pad_wbuf(struct logfs_area *area, int final)
100{ 97{
101 struct super_block *sb = area->a_sb; 98 struct super_block *sb = area->a_sb;
102 struct logfs_super *super = logfs_super(sb);
103 struct page *page; 99 struct page *page;
104 u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); 100 u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
105 pgoff_t index = ofs >> PAGE_SHIFT; 101 pgoff_t index = ofs >> PAGE_SHIFT;
106 long offset = ofs & (PAGE_SIZE-1); 102 long offset = ofs & (PAGE_SIZE-1);
107 u32 len = PAGE_SIZE - offset; 103 u32 len = PAGE_SIZE - offset;
108 104
109 if (len == PAGE_SIZE) { 105 if (len % PAGE_SIZE) {
110 /* The math in this function can surely use some love */ 106 page = get_mapping_page(sb, index, 0);
111 len = 0;
112 }
113 if (len) {
114 BUG_ON(area->a_used_bytes >= super->s_segsize);
115
116 page = get_mapping_page(area->a_sb, index, 0);
117 BUG_ON(!page); /* FIXME: reserve a pool */ 107 BUG_ON(!page); /* FIXME: reserve a pool */
118 memset(page_address(page) + offset, 0xff, len); 108 memset(page_address(page) + offset, 0xff, len);
119 SetPagePrivate(page); 109 SetPagePrivate(page);
120 page_cache_release(page); 110 page_cache_release(page);
121 } 111 }
112}
122 113
123 if (!final) 114static void pad_full_pages(struct logfs_area *area)
124 return; 115{
116 struct super_block *sb = area->a_sb;
117 struct logfs_super *super = logfs_super(sb);
118 u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
119 u32 len = super->s_segsize - area->a_used_bytes;
120 pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
121 pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
122 struct page *page;
125 123
126 area->a_used_bytes += len; 124 while (no_indizes) {
127 for ( ; area->a_used_bytes < super->s_segsize; 125 page = get_mapping_page(sb, index, 0);
128 area->a_used_bytes += PAGE_SIZE) {
129 /* Memset another page */
130 index++;
131 page = get_mapping_page(area->a_sb, index, 0);
132 BUG_ON(!page); /* FIXME: reserve a pool */ 126 BUG_ON(!page); /* FIXME: reserve a pool */
133 memset(page_address(page), 0xff, PAGE_SIZE); 127 SetPageUptodate(page);
128 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
134 SetPagePrivate(page); 129 SetPagePrivate(page);
135 page_cache_release(page); 130 page_cache_release(page);
131 index++;
132 no_indizes--;
136 } 133 }
137} 134}
138 135
139/* 136/*
137 * bdev_writeseg will write full pages. Memset the tail to prevent data leaks.
138 * Also make sure we allocate (and memset) all pages for final writeout.
139 */
140static void pad_wbuf(struct logfs_area *area, int final)
141{
142 pad_partial_page(area);
143 if (final)
144 pad_full_pages(area);
145}
146
147/*
140 * We have to be careful with the alias tree. Since lookup is done by bix, 148 * We have to be careful with the alias tree. Since lookup is done by bix,
141 * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with 149 * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
142 * indirect blocks. So always use it through accessor functions. 150 * indirect blocks. So always use it through accessor functions.
@@ -683,7 +691,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
683 return 0; 691 return 0;
684} 692}
685 693
686static void freeseg(struct super_block *sb, u32 segno) 694void freeseg(struct super_block *sb, u32 segno)
687{ 695{
688 struct logfs_super *super = logfs_super(sb); 696 struct logfs_super *super = logfs_super(sb);
689 struct address_space *mapping = super->s_mapping_inode->i_mapping; 697 struct address_space *mapping = super->s_mapping_inode->i_mapping;
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c66beab78dee..9d856c49afc5 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -277,7 +277,7 @@ static int logfs_recover_sb(struct super_block *sb)
277 } 277 }
278 if (valid0 && valid1 && ds_cmp(ds0, ds1)) { 278 if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
279 printk(KERN_INFO"Superblocks don't match - fixing.\n"); 279 printk(KERN_INFO"Superblocks don't match - fixing.\n");
280 return write_one_sb(sb, super->s_devops->find_last_sb); 280 return logfs_write_sb(sb);
281 } 281 }
282 /* If neither is valid now, something's wrong. Didn't we properly 282 /* If neither is valid now, something's wrong. Didn't we properly
283 * check them before?!? */ 283 * check them before?!? */
@@ -289,6 +289,10 @@ static int logfs_make_writeable(struct super_block *sb)
289{ 289{
290 int err; 290 int err;
291 291
292 err = logfs_open_segfile(sb);
293 if (err)
294 return err;
295
292 /* Repair any broken superblock copies */ 296 /* Repair any broken superblock copies */
293 err = logfs_recover_sb(sb); 297 err = logfs_recover_sb(sb);
294 if (err) 298 if (err)
@@ -299,10 +303,6 @@ static int logfs_make_writeable(struct super_block *sb)
299 if (err) 303 if (err)
300 return err; 304 return err;
301 305
302 err = logfs_open_segfile(sb);
303 if (err)
304 return err;
305
306 /* Do one GC pass before any data gets dirtied */ 306 /* Do one GC pass before any data gets dirtied */
307 logfs_gc_pass(sb); 307 logfs_gc_pass(sb);
308 308
@@ -328,7 +328,7 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
328 328
329 sb->s_root = d_alloc_root(rootdir); 329 sb->s_root = d_alloc_root(rootdir);
330 if (!sb->s_root) 330 if (!sb->s_root)
331 goto fail; 331 goto fail2;
332 332
333 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 333 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
334 if (!super->s_erase_page) 334 if (!super->s_erase_page)
@@ -572,8 +572,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
572 return 0; 572 return 0;
573 573
574err1: 574err1:
575 up_write(&sb->s_umount); 575 deactivate_locked_super(sb);
576 deactivate_super(sb);
577 return err; 576 return err;
578err0: 577err0:
579 kfree(super); 578 kfree(super);
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 0501974bedd0..8ccf0f8c9cc8 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -30,6 +30,8 @@
30#include "alloc.h" 30#include "alloc.h"
31#include "dlmglue.h" 31#include "dlmglue.h"
32#include "file.h" 32#include "file.h"
33#include "inode.h"
34#include "journal.h"
33#include "ocfs2_fs.h" 35#include "ocfs2_fs.h"
34 36
35#include "xattr.h" 37#include "xattr.h"
@@ -166,6 +168,60 @@ static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
166} 168}
167 169
168/* 170/*
171 * Helper function to set i_mode in memory and disk. Some call paths
172 * will not have di_bh or a journal handle to pass, in which case it
173 * will create it's own.
174 */
175static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
176 handle_t *handle, umode_t new_mode)
177{
178 int ret, commit_handle = 0;
179 struct ocfs2_dinode *di;
180
181 if (di_bh == NULL) {
182 ret = ocfs2_read_inode_block(inode, &di_bh);
183 if (ret) {
184 mlog_errno(ret);
185 goto out;
186 }
187 } else
188 get_bh(di_bh);
189
190 if (handle == NULL) {
191 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
192 OCFS2_INODE_UPDATE_CREDITS);
193 if (IS_ERR(handle)) {
194 ret = PTR_ERR(handle);
195 mlog_errno(ret);
196 goto out_brelse;
197 }
198
199 commit_handle = 1;
200 }
201
202 di = (struct ocfs2_dinode *)di_bh->b_data;
203 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
204 OCFS2_JOURNAL_ACCESS_WRITE);
205 if (ret) {
206 mlog_errno(ret);
207 goto out_commit;
208 }
209
210 inode->i_mode = new_mode;
211 di->i_mode = cpu_to_le16(inode->i_mode);
212
213 ocfs2_journal_dirty(handle, di_bh);
214
215out_commit:
216 if (commit_handle)
217 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
218out_brelse:
219 brelse(di_bh);
220out:
221 return ret;
222}
223
224/*
169 * Set the access or default ACL of an inode. 225 * Set the access or default ACL of an inode.
170 */ 226 */
171static int ocfs2_set_acl(handle_t *handle, 227static int ocfs2_set_acl(handle_t *handle,
@@ -193,9 +249,14 @@ static int ocfs2_set_acl(handle_t *handle,
193 if (ret < 0) 249 if (ret < 0)
194 return ret; 250 return ret;
195 else { 251 else {
196 inode->i_mode = mode;
197 if (ret == 0) 252 if (ret == 0)
198 acl = NULL; 253 acl = NULL;
254
255 ret = ocfs2_acl_set_mode(inode, di_bh,
256 handle, mode);
257 if (ret)
258 return ret;
259
199 } 260 }
200 } 261 }
201 break; 262 break;
@@ -283,6 +344,7 @@ int ocfs2_init_acl(handle_t *handle,
283 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 344 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
284 struct posix_acl *acl = NULL; 345 struct posix_acl *acl = NULL;
285 int ret = 0; 346 int ret = 0;
347 mode_t mode;
286 348
287 if (!S_ISLNK(inode->i_mode)) { 349 if (!S_ISLNK(inode->i_mode)) {
288 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 350 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
@@ -291,12 +353,17 @@ int ocfs2_init_acl(handle_t *handle,
291 if (IS_ERR(acl)) 353 if (IS_ERR(acl))
292 return PTR_ERR(acl); 354 return PTR_ERR(acl);
293 } 355 }
294 if (!acl) 356 if (!acl) {
295 inode->i_mode &= ~current_umask(); 357 mode = inode->i_mode & ~current_umask();
358 ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
359 if (ret) {
360 mlog_errno(ret);
361 goto cleanup;
362 }
363 }
296 } 364 }
297 if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { 365 if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
298 struct posix_acl *clone; 366 struct posix_acl *clone;
299 mode_t mode;
300 367
301 if (S_ISDIR(inode->i_mode)) { 368 if (S_ISDIR(inode->i_mode)) {
302 ret = ocfs2_set_acl(handle, inode, di_bh, 369 ret = ocfs2_set_acl(handle, inode, di_bh,
@@ -313,7 +380,7 @@ int ocfs2_init_acl(handle_t *handle,
313 mode = inode->i_mode; 380 mode = inode->i_mode;
314 ret = posix_acl_create_masq(clone, &mode); 381 ret = posix_acl_create_masq(clone, &mode);
315 if (ret >= 0) { 382 if (ret >= 0) {
316 inode->i_mode = mode; 383 ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
317 if (ret > 0) { 384 if (ret > 0) {
318 ret = ocfs2_set_acl(handle, inode, 385 ret = ocfs2_set_acl(handle, inode,
319 di_bh, ACL_TYPE_ACCESS, 386 di_bh, ACL_TYPE_ACCESS,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a659606dcb95..9289b4357d27 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1875,7 +1875,6 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
1875ok: 1875ok:
1876 spin_unlock(&res->spinlock); 1876 spin_unlock(&res->spinlock);
1877 } 1877 }
1878 spin_unlock(&dlm->spinlock);
1879 1878
1880 // mlog(0, "woo! got an assert_master from node %u!\n", 1879 // mlog(0, "woo! got an assert_master from node %u!\n",
1881 // assert->node_idx); 1880 // assert->node_idx);
@@ -1926,7 +1925,6 @@ ok:
1926 /* master is known, detach if not already detached. 1925 /* master is known, detach if not already detached.
1927 * ensures that only one assert_master call will happen 1926 * ensures that only one assert_master call will happen
1928 * on this mle. */ 1927 * on this mle. */
1929 spin_lock(&dlm->spinlock);
1930 spin_lock(&dlm->master_lock); 1928 spin_lock(&dlm->master_lock);
1931 1929
1932 rr = atomic_read(&mle->mle_refs.refcount); 1930 rr = atomic_read(&mle->mle_refs.refcount);
@@ -1959,7 +1957,6 @@ ok:
1959 __dlm_put_mle(mle); 1957 __dlm_put_mle(mle);
1960 } 1958 }
1961 spin_unlock(&dlm->master_lock); 1959 spin_unlock(&dlm->master_lock);
1962 spin_unlock(&dlm->spinlock);
1963 } else if (res) { 1960 } else if (res) {
1964 if (res->owner != assert->node_idx) { 1961 if (res->owner != assert->node_idx) {
1965 mlog(0, "assert_master from %u, but current " 1962 mlog(0, "assert_master from %u, but current "
@@ -1967,6 +1964,7 @@ ok:
1967 res->owner, namelen, name); 1964 res->owner, namelen, name);
1968 } 1965 }
1969 } 1966 }
1967 spin_unlock(&dlm->spinlock);
1970 1968
1971done: 1969done:
1972 ret = 0; 1970 ret = 0;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 278a223aae14..ab207901d32a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -891,6 +891,21 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
891 /* Do some basic inode verification... */ 891 /* Do some basic inode verification... */
892 di = (struct ocfs2_dinode *) di_bh->b_data; 892 di = (struct ocfs2_dinode *) di_bh->b_data;
893 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { 893 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
894 /*
895 * Inodes in the orphan dir must have ORPHANED_FL. The only
896 * inodes that come back out of the orphan dir are reflink
897 * targets. A reflink target may be moved out of the orphan
898 * dir between the time we scan the directory and the time we
899 * process it. This would lead to HAS_REFCOUNT_FL being set but
900 * ORPHANED_FL not.
901 */
902 if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
903 mlog(0, "Reflinked inode %llu is no longer orphaned. "
904 "it shouldn't be deleted\n",
905 (unsigned long long)oi->ip_blkno);
906 goto bail;
907 }
908
894 /* for lack of a better error? */ 909 /* for lack of a better error? */
895 status = -EEXIST; 910 status = -EEXIST;
896 mlog(ML_ERROR, 911 mlog(ML_ERROR,
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ca992d91f511..c983715d8d8c 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -872,8 +872,10 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
872 (unsigned long long)la_start_blk, 872 (unsigned long long)la_start_blk,
873 (unsigned long long)blkno); 873 (unsigned long long)blkno);
874 874
875 status = ocfs2_free_clusters(handle, main_bm_inode, 875 status = ocfs2_release_clusters(handle,
876 main_bm_bh, blkno, count); 876 main_bm_inode,
877 main_bm_bh, blkno,
878 count);
877 if (status < 0) { 879 if (status < 0) {
878 mlog_errno(status); 880 mlog_errno(status);
879 goto bail; 881 goto bail;
@@ -984,8 +986,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
984 } 986 }
985 987
986retry_enospc: 988retry_enospc:
987 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 989 (*ac)->ac_bits_wanted = osb->local_alloc_default_bits;
988
989 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 990 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
990 if (status == -ENOSPC) { 991 if (status == -ENOSPC) {
991 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 992 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
@@ -1061,6 +1062,7 @@ retry_enospc:
1061 OCFS2_LA_DISABLED) 1062 OCFS2_LA_DISABLED)
1062 goto bail; 1063 goto bail;
1063 1064
1065 ac->ac_bits_wanted = osb->local_alloc_default_bits;
1064 status = ocfs2_claim_clusters(osb, handle, ac, 1066 status = ocfs2_claim_clusters(osb, handle, ac,
1065 osb->local_alloc_bits, 1067 osb->local_alloc_bits,
1066 &cluster_off, 1068 &cluster_off,
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 544ac6245175..b5cb3ede9408 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
133 133
134 if (!(fl->fl_flags & FL_POSIX)) 134 if (!(fl->fl_flags & FL_POSIX))
135 return -ENOLCK; 135 return -ENOLCK;
136 if (__mandatory_lock(inode)) 136 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
137 return -ENOLCK; 137 return -ENOLCK;
138 138
139 return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); 139 return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d9cd4e373a53..b1eb50ae4097 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
84static int ocfs2_orphan_add(struct ocfs2_super *osb, 84static int ocfs2_orphan_add(struct ocfs2_super *osb,
85 handle_t *handle, 85 handle_t *handle,
86 struct inode *inode, 86 struct inode *inode,
87 struct ocfs2_dinode *fe, 87 struct buffer_head *fe_bh,
88 char *name, 88 char *name,
89 struct ocfs2_dir_lookup_result *lookup, 89 struct ocfs2_dir_lookup_result *lookup,
90 struct inode *orphan_dir_inode); 90 struct inode *orphan_dir_inode);
@@ -879,7 +879,7 @@ static int ocfs2_unlink(struct inode *dir,
879 fe = (struct ocfs2_dinode *) fe_bh->b_data; 879 fe = (struct ocfs2_dinode *) fe_bh->b_data;
880 880
881 if (inode_is_unlinkable(inode)) { 881 if (inode_is_unlinkable(inode)) {
882 status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, 882 status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name,
883 &orphan_insert, orphan_dir); 883 &orphan_insert, orphan_dir);
884 if (status < 0) { 884 if (status < 0) {
885 mlog_errno(status); 885 mlog_errno(status);
@@ -1300,7 +1300,7 @@ static int ocfs2_rename(struct inode *old_dir,
1300 if (S_ISDIR(new_inode->i_mode) || 1300 if (S_ISDIR(new_inode->i_mode) ||
1301 (ocfs2_read_links_count(newfe) == 1)) { 1301 (ocfs2_read_links_count(newfe) == 1)) {
1302 status = ocfs2_orphan_add(osb, handle, new_inode, 1302 status = ocfs2_orphan_add(osb, handle, new_inode,
1303 newfe, orphan_name, 1303 newfe_bh, orphan_name,
1304 &orphan_insert, orphan_dir); 1304 &orphan_insert, orphan_dir);
1305 if (status < 0) { 1305 if (status < 0) {
1306 mlog_errno(status); 1306 mlog_errno(status);
@@ -1911,7 +1911,7 @@ leave:
1911static int ocfs2_orphan_add(struct ocfs2_super *osb, 1911static int ocfs2_orphan_add(struct ocfs2_super *osb,
1912 handle_t *handle, 1912 handle_t *handle,
1913 struct inode *inode, 1913 struct inode *inode,
1914 struct ocfs2_dinode *fe, 1914 struct buffer_head *fe_bh,
1915 char *name, 1915 char *name,
1916 struct ocfs2_dir_lookup_result *lookup, 1916 struct ocfs2_dir_lookup_result *lookup,
1917 struct inode *orphan_dir_inode) 1917 struct inode *orphan_dir_inode)
@@ -1919,6 +1919,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1919 struct buffer_head *orphan_dir_bh = NULL; 1919 struct buffer_head *orphan_dir_bh = NULL;
1920 int status = 0; 1920 int status = 0;
1921 struct ocfs2_dinode *orphan_fe; 1921 struct ocfs2_dinode *orphan_fe;
1922 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
1922 1923
1923 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 1924 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
1924 1925
@@ -1959,6 +1960,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1959 goto leave; 1960 goto leave;
1960 } 1961 }
1961 1962
1963 /*
1964 * We're going to journal the change of i_flags and i_orphaned_slot.
1965 * It's safe anyway, though some callers may duplicate the journaling.
1966 * Journaling within the func just make the logic look more
1967 * straightforward.
1968 */
1969 status = ocfs2_journal_access_di(handle,
1970 INODE_CACHE(inode),
1971 fe_bh,
1972 OCFS2_JOURNAL_ACCESS_WRITE);
1973 if (status < 0) {
1974 mlog_errno(status);
1975 goto leave;
1976 }
1977
1962 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); 1978 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
1963 1979
1964 /* Record which orphan dir our inode now resides 1980 /* Record which orphan dir our inode now resides
@@ -1966,6 +1982,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1966 * dir to lock. */ 1982 * dir to lock. */
1967 fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); 1983 fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
1968 1984
1985 ocfs2_journal_dirty(handle, fe_bh);
1986
1969 mlog(0, "Inode %llu orphaned in slot %d\n", 1987 mlog(0, "Inode %llu orphaned in slot %d\n",
1970 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); 1988 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
1971 1989
@@ -2123,7 +2141,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2123 } 2141 }
2124 2142
2125 di = (struct ocfs2_dinode *)new_di_bh->b_data; 2143 di = (struct ocfs2_dinode *)new_di_bh->b_data;
2126 status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name, 2144 status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
2127 &orphan_insert, orphan_dir); 2145 &orphan_insert, orphan_dir);
2128 if (status < 0) { 2146 if (status < 0) {
2129 mlog_errno(status); 2147 mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1238b491db90..adf5e2ebc2c4 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -763,8 +763,18 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
763 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); 763 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
764} 764}
765 765
766#define ocfs2_set_bit ext2_set_bit 766static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
767#define ocfs2_clear_bit ext2_clear_bit 767{
768 ext2_set_bit(bit, bitmap);
769}
770#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr))
771
772static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap)
773{
774 ext2_clear_bit(bit, bitmap);
775}
776#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr))
777
768#define ocfs2_test_bit ext2_test_bit 778#define ocfs2_test_bit ext2_test_bit
769#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit 779#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
770#define ocfs2_find_next_bit ext2_find_next_bit 780#define ocfs2_find_next_bit ext2_find_next_bit
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9e96921dffda..29405f2ff616 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4075,6 +4075,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
4075 OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; 4075 OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
4076 spin_unlock(&OCFS2_I(t_inode)->ip_lock); 4076 spin_unlock(&OCFS2_I(t_inode)->ip_lock);
4077 i_size_write(t_inode, size); 4077 i_size_write(t_inode, size);
4078 t_inode->i_blocks = s_inode->i_blocks;
4078 4079
4079 di->i_xattr_inline_size = s_di->i_xattr_inline_size; 4080 di->i_xattr_inline_size = s_di->i_xattr_inline_size;
4080 di->i_clusters = s_di->i_clusters; 4081 di->i_clusters = s_di->i_clusters;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c3c60bc3e072..19ba00f28547 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -95,13 +95,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
95 struct buffer_head *group_bh, 95 struct buffer_head *group_bh,
96 unsigned int bit_off, 96 unsigned int bit_off,
97 unsigned int num_bits); 97 unsigned int num_bits);
98static inline int ocfs2_block_group_clear_bits(handle_t *handle,
99 struct inode *alloc_inode,
100 struct ocfs2_group_desc *bg,
101 struct buffer_head *group_bh,
102 unsigned int bit_off,
103 unsigned int num_bits);
104
105static int ocfs2_relink_block_group(handle_t *handle, 98static int ocfs2_relink_block_group(handle_t *handle,
106 struct inode *alloc_inode, 99 struct inode *alloc_inode,
107 struct buffer_head *fe_bh, 100 struct buffer_head *fe_bh,
@@ -152,7 +145,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
152 145
153#define do_error(fmt, ...) \ 146#define do_error(fmt, ...) \
154 do{ \ 147 do{ \
155 if (clean_error) \ 148 if (resize) \
156 mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ 149 mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
157 else \ 150 else \
158 ocfs2_error(sb, fmt, ##__VA_ARGS__); \ 151 ocfs2_error(sb, fmt, ##__VA_ARGS__); \
@@ -160,7 +153,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
160 153
161static int ocfs2_validate_gd_self(struct super_block *sb, 154static int ocfs2_validate_gd_self(struct super_block *sb,
162 struct buffer_head *bh, 155 struct buffer_head *bh,
163 int clean_error) 156 int resize)
164{ 157{
165 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; 158 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
166 159
@@ -211,7 +204,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb,
211static int ocfs2_validate_gd_parent(struct super_block *sb, 204static int ocfs2_validate_gd_parent(struct super_block *sb,
212 struct ocfs2_dinode *di, 205 struct ocfs2_dinode *di,
213 struct buffer_head *bh, 206 struct buffer_head *bh,
214 int clean_error) 207 int resize)
215{ 208{
216 unsigned int max_bits; 209 unsigned int max_bits;
217 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; 210 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
@@ -233,8 +226,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb,
233 return -EINVAL; 226 return -EINVAL;
234 } 227 }
235 228
236 if (le16_to_cpu(gd->bg_chain) >= 229 /* In resize, we may meet the case bg_chain == cl_next_free_rec. */
237 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { 230 if ((le16_to_cpu(gd->bg_chain) >
231 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) ||
232 ((le16_to_cpu(gd->bg_chain) ==
233 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) {
238 do_error("Group descriptor #%llu has bad chain %u", 234 do_error("Group descriptor #%llu has bad chain %u",
239 (unsigned long long)bh->b_blocknr, 235 (unsigned long long)bh->b_blocknr,
240 le16_to_cpu(gd->bg_chain)); 236 le16_to_cpu(gd->bg_chain));
@@ -1975,18 +1971,18 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
1975 bits_wanted, cluster_start, num_clusters); 1971 bits_wanted, cluster_start, num_clusters);
1976} 1972}
1977 1973
1978static inline int ocfs2_block_group_clear_bits(handle_t *handle, 1974static int ocfs2_block_group_clear_bits(handle_t *handle,
1979 struct inode *alloc_inode, 1975 struct inode *alloc_inode,
1980 struct ocfs2_group_desc *bg, 1976 struct ocfs2_group_desc *bg,
1981 struct buffer_head *group_bh, 1977 struct buffer_head *group_bh,
1982 unsigned int bit_off, 1978 unsigned int bit_off,
1983 unsigned int num_bits) 1979 unsigned int num_bits,
1980 void (*undo_fn)(unsigned int bit,
1981 unsigned long *bmap))
1984{ 1982{
1985 int status; 1983 int status;
1986 unsigned int tmp; 1984 unsigned int tmp;
1987 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1988 struct ocfs2_group_desc *undo_bg = NULL; 1985 struct ocfs2_group_desc *undo_bg = NULL;
1989 int cluster_bitmap = 0;
1990 1986
1991 mlog_entry_void(); 1987 mlog_entry_void();
1992 1988
@@ -1996,20 +1992,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1996 1992
1997 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 1993 mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1998 1994
1999 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1995 BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
2000 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
2001
2002 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), 1996 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
2003 group_bh, journal_type); 1997 group_bh,
1998 undo_fn ?
1999 OCFS2_JOURNAL_ACCESS_UNDO :
2000 OCFS2_JOURNAL_ACCESS_WRITE);
2004 if (status < 0) { 2001 if (status < 0) {
2005 mlog_errno(status); 2002 mlog_errno(status);
2006 goto bail; 2003 goto bail;
2007 } 2004 }
2008 2005
2009 if (ocfs2_is_cluster_bitmap(alloc_inode)) 2006 if (undo_fn) {
2010 cluster_bitmap = 1;
2011
2012 if (cluster_bitmap) {
2013 jbd_lock_bh_state(group_bh); 2007 jbd_lock_bh_state(group_bh);
2014 undo_bg = (struct ocfs2_group_desc *) 2008 undo_bg = (struct ocfs2_group_desc *)
2015 bh2jh(group_bh)->b_committed_data; 2009 bh2jh(group_bh)->b_committed_data;
@@ -2020,13 +2014,13 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
2020 while(tmp--) { 2014 while(tmp--) {
2021 ocfs2_clear_bit((bit_off + tmp), 2015 ocfs2_clear_bit((bit_off + tmp),
2022 (unsigned long *) bg->bg_bitmap); 2016 (unsigned long *) bg->bg_bitmap);
2023 if (cluster_bitmap) 2017 if (undo_fn)
2024 ocfs2_set_bit(bit_off + tmp, 2018 undo_fn(bit_off + tmp,
2025 (unsigned long *) undo_bg->bg_bitmap); 2019 (unsigned long *) undo_bg->bg_bitmap);
2026 } 2020 }
2027 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 2021 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
2028 2022
2029 if (cluster_bitmap) 2023 if (undo_fn)
2030 jbd_unlock_bh_state(group_bh); 2024 jbd_unlock_bh_state(group_bh);
2031 2025
2032 status = ocfs2_journal_dirty(handle, group_bh); 2026 status = ocfs2_journal_dirty(handle, group_bh);
@@ -2039,12 +2033,14 @@ bail:
2039/* 2033/*
2040 * expects the suballoc inode to already be locked. 2034 * expects the suballoc inode to already be locked.
2041 */ 2035 */
2042int ocfs2_free_suballoc_bits(handle_t *handle, 2036static int _ocfs2_free_suballoc_bits(handle_t *handle,
2043 struct inode *alloc_inode, 2037 struct inode *alloc_inode,
2044 struct buffer_head *alloc_bh, 2038 struct buffer_head *alloc_bh,
2045 unsigned int start_bit, 2039 unsigned int start_bit,
2046 u64 bg_blkno, 2040 u64 bg_blkno,
2047 unsigned int count) 2041 unsigned int count,
2042 void (*undo_fn)(unsigned int bit,
2043 unsigned long *bitmap))
2048{ 2044{
2049 int status = 0; 2045 int status = 0;
2050 u32 tmp_used; 2046 u32 tmp_used;
@@ -2079,7 +2075,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
2079 2075
2080 status = ocfs2_block_group_clear_bits(handle, alloc_inode, 2076 status = ocfs2_block_group_clear_bits(handle, alloc_inode,
2081 group, group_bh, 2077 group, group_bh,
2082 start_bit, count); 2078 start_bit, count, undo_fn);
2083 if (status < 0) { 2079 if (status < 0) {
2084 mlog_errno(status); 2080 mlog_errno(status);
2085 goto bail; 2081 goto bail;
@@ -2110,6 +2106,17 @@ bail:
2110 return status; 2106 return status;
2111} 2107}
2112 2108
2109int ocfs2_free_suballoc_bits(handle_t *handle,
2110 struct inode *alloc_inode,
2111 struct buffer_head *alloc_bh,
2112 unsigned int start_bit,
2113 u64 bg_blkno,
2114 unsigned int count)
2115{
2116 return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh,
2117 start_bit, bg_blkno, count, NULL);
2118}
2119
2113int ocfs2_free_dinode(handle_t *handle, 2120int ocfs2_free_dinode(handle_t *handle,
2114 struct inode *inode_alloc_inode, 2121 struct inode *inode_alloc_inode,
2115 struct buffer_head *inode_alloc_bh, 2122 struct buffer_head *inode_alloc_bh,
@@ -2123,11 +2130,13 @@ int ocfs2_free_dinode(handle_t *handle,
2123 inode_alloc_bh, bit, bg_blkno, 1); 2130 inode_alloc_bh, bit, bg_blkno, 1);
2124} 2131}
2125 2132
2126int ocfs2_free_clusters(handle_t *handle, 2133static int _ocfs2_free_clusters(handle_t *handle,
2127 struct inode *bitmap_inode, 2134 struct inode *bitmap_inode,
2128 struct buffer_head *bitmap_bh, 2135 struct buffer_head *bitmap_bh,
2129 u64 start_blk, 2136 u64 start_blk,
2130 unsigned int num_clusters) 2137 unsigned int num_clusters,
2138 void (*undo_fn)(unsigned int bit,
2139 unsigned long *bitmap))
2131{ 2140{
2132 int status; 2141 int status;
2133 u16 bg_start_bit; 2142 u16 bg_start_bit;
@@ -2154,9 +2163,9 @@ int ocfs2_free_clusters(handle_t *handle,
2154 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", 2163 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
2155 (unsigned long long)bg_blkno, bg_start_bit); 2164 (unsigned long long)bg_blkno, bg_start_bit);
2156 2165
2157 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 2166 status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
2158 bg_start_bit, bg_blkno, 2167 bg_start_bit, bg_blkno,
2159 num_clusters); 2168 num_clusters, undo_fn);
2160 if (status < 0) { 2169 if (status < 0) {
2161 mlog_errno(status); 2170 mlog_errno(status);
2162 goto out; 2171 goto out;
@@ -2170,6 +2179,32 @@ out:
2170 return status; 2179 return status;
2171} 2180}
2172 2181
2182int ocfs2_free_clusters(handle_t *handle,
2183 struct inode *bitmap_inode,
2184 struct buffer_head *bitmap_bh,
2185 u64 start_blk,
2186 unsigned int num_clusters)
2187{
2188 return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
2189 start_blk, num_clusters,
2190 _ocfs2_set_bit);
2191}
2192
2193/*
2194 * Give never-used clusters back to the global bitmap. We don't need
2195 * to protect these bits in the undo buffer.
2196 */
2197int ocfs2_release_clusters(handle_t *handle,
2198 struct inode *bitmap_inode,
2199 struct buffer_head *bitmap_bh,
2200 u64 start_blk,
2201 unsigned int num_clusters)
2202{
2203 return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
2204 start_blk, num_clusters,
2205 _ocfs2_clear_bit);
2206}
2207
2173static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) 2208static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
2174{ 2209{
2175 printk("Block Group:\n"); 2210 printk("Block Group:\n");
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index fa60723c43e8..e0f46df357e6 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -127,6 +127,11 @@ int ocfs2_free_clusters(handle_t *handle,
127 struct buffer_head *bitmap_bh, 127 struct buffer_head *bitmap_bh,
128 u64 start_blk, 128 u64 start_blk,
129 unsigned int num_clusters); 129 unsigned int num_clusters);
130int ocfs2_release_clusters(handle_t *handle,
131 struct inode *bitmap_inode,
132 struct buffer_head *bitmap_bh,
133 u64 start_blk,
134 unsigned int num_clusters);
130 135
131static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) 136static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
132{ 137{
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d1b0d386f6d1..3e7773089b96 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1622,7 +1622,7 @@ static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1622 /* Now tell xh->xh_entries about it */ 1622 /* Now tell xh->xh_entries about it */
1623 for (i = 0; i < count; i++) { 1623 for (i = 0; i < count; i++) {
1624 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); 1624 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1625 if (offset < namevalue_offset) 1625 if (offset <= namevalue_offset)
1626 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, 1626 le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1627 namevalue_size); 1627 namevalue_size);
1628 } 1628 }
@@ -6528,13 +6528,11 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
6528 int indexed) 6528 int indexed)
6529{ 6529{
6530 int ret; 6530 int ret;
6531 struct ocfs2_alloc_context *meta_ac;
6532 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6531 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6533 struct ocfs2_xattr_set_ctxt ctxt = { 6532 struct ocfs2_xattr_set_ctxt ctxt;
6534 .meta_ac = meta_ac,
6535 };
6536 6533
6537 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 6534 memset(&ctxt, 0, sizeof(ctxt));
6535 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6538 if (ret < 0) { 6536 if (ret < 0) {
6539 mlog_errno(ret); 6537 mlog_errno(ret);
6540 return ret; 6538 return ret;
@@ -6556,7 +6554,7 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
6556 6554
6557 ocfs2_commit_trans(osb, ctxt.handle); 6555 ocfs2_commit_trans(osb, ctxt.handle);
6558out: 6556out:
6559 ocfs2_free_alloc_context(meta_ac); 6557 ocfs2_free_alloc_context(ctxt.meta_ac);
6560 return ret; 6558 return ret;
6561} 6559}
6562 6560
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a7310841c831..b1f6e62773d3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -442,12 +442,13 @@ static const struct file_operations proc_lstats_operations = {
442unsigned long badness(struct task_struct *p, unsigned long uptime); 442unsigned long badness(struct task_struct *p, unsigned long uptime);
443static int proc_oom_score(struct task_struct *task, char *buffer) 443static int proc_oom_score(struct task_struct *task, char *buffer)
444{ 444{
445 unsigned long points; 445 unsigned long points = 0;
446 struct timespec uptime; 446 struct timespec uptime;
447 447
448 do_posix_clock_monotonic_gettime(&uptime); 448 do_posix_clock_monotonic_gettime(&uptime);
449 read_lock(&tasklist_lock); 449 read_lock(&tasklist_lock);
450 points = badness(task->group_leader, uptime.tv_sec); 450 if (pid_alive(task))
451 points = badness(task, uptime.tv_sec);
451 read_unlock(&tasklist_lock); 452 read_unlock(&tasklist_lock);
452 return sprintf(buffer, "%lu\n", points); 453 return sprintf(buffer, "%lu\n", points);
453} 454}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 183f8ff5f400..096273984c3b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -406,6 +406,7 @@ static int show_smap(struct seq_file *m, void *v)
406 406
407 memset(&mss, 0, sizeof mss); 407 memset(&mss, 0, sizeof mss);
408 mss.vma = vma; 408 mss.vma = vma;
409 /* mmap_sem is held in m_start */
409 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 410 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
410 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); 411 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
411 412
@@ -552,7 +553,8 @@ const struct file_operations proc_clear_refs_operations = {
552}; 553};
553 554
554struct pagemapread { 555struct pagemapread {
555 u64 __user *out, *end; 556 int pos, len;
557 u64 *buffer;
556}; 558};
557 559
558#define PM_ENTRY_BYTES sizeof(u64) 560#define PM_ENTRY_BYTES sizeof(u64)
@@ -575,10 +577,8 @@ struct pagemapread {
575static int add_to_pagemap(unsigned long addr, u64 pfn, 577static int add_to_pagemap(unsigned long addr, u64 pfn,
576 struct pagemapread *pm) 578 struct pagemapread *pm)
577{ 579{
578 if (put_user(pfn, pm->out)) 580 pm->buffer[pm->pos++] = pfn;
579 return -EFAULT; 581 if (pm->pos >= pm->len)
580 pm->out++;
581 if (pm->out >= pm->end)
582 return PM_END_OF_BUFFER; 582 return PM_END_OF_BUFFER;
583 return 0; 583 return 0;
584} 584}
@@ -720,21 +720,20 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
720 * determine which areas of memory are actually mapped and llseek to 720 * determine which areas of memory are actually mapped and llseek to
721 * skip over unmapped regions. 721 * skip over unmapped regions.
722 */ 722 */
723#define PAGEMAP_WALK_SIZE (PMD_SIZE)
723static ssize_t pagemap_read(struct file *file, char __user *buf, 724static ssize_t pagemap_read(struct file *file, char __user *buf,
724 size_t count, loff_t *ppos) 725 size_t count, loff_t *ppos)
725{ 726{
726 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 727 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
727 struct page **pages, *page;
728 unsigned long uaddr, uend;
729 struct mm_struct *mm; 728 struct mm_struct *mm;
730 struct pagemapread pm; 729 struct pagemapread pm;
731 int pagecount;
732 int ret = -ESRCH; 730 int ret = -ESRCH;
733 struct mm_walk pagemap_walk = {}; 731 struct mm_walk pagemap_walk = {};
734 unsigned long src; 732 unsigned long src;
735 unsigned long svpfn; 733 unsigned long svpfn;
736 unsigned long start_vaddr; 734 unsigned long start_vaddr;
737 unsigned long end_vaddr; 735 unsigned long end_vaddr;
736 int copied = 0;
738 737
739 if (!task) 738 if (!task)
740 goto out; 739 goto out;
@@ -757,35 +756,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
757 if (!mm) 756 if (!mm)
758 goto out_task; 757 goto out_task;
759 758
760 759 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
761 uaddr = (unsigned long)buf & PAGE_MASK; 760 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
762 uend = (unsigned long)(buf + count);
763 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
764 ret = 0;
765 if (pagecount == 0)
766 goto out_mm;
767 pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
768 ret = -ENOMEM; 761 ret = -ENOMEM;
769 if (!pages) 762 if (!pm.buffer)
770 goto out_mm; 763 goto out_mm;
771 764
772 down_read(&current->mm->mmap_sem);
773 ret = get_user_pages(current, current->mm, uaddr, pagecount,
774 1, 0, pages, NULL);
775 up_read(&current->mm->mmap_sem);
776
777 if (ret < 0)
778 goto out_free;
779
780 if (ret != pagecount) {
781 pagecount = ret;
782 ret = -EFAULT;
783 goto out_pages;
784 }
785
786 pm.out = (u64 __user *)buf;
787 pm.end = (u64 __user *)(buf + count);
788
789 pagemap_walk.pmd_entry = pagemap_pte_range; 765 pagemap_walk.pmd_entry = pagemap_pte_range;
790 pagemap_walk.pte_hole = pagemap_pte_hole; 766 pagemap_walk.pte_hole = pagemap_pte_hole;
791 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; 767 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
@@ -807,23 +783,36 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
807 * user buffer is tracked in "pm", and the walk 783 * user buffer is tracked in "pm", and the walk
808 * will stop when we hit the end of the buffer. 784 * will stop when we hit the end of the buffer.
809 */ 785 */
810 ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); 786 ret = 0;
811 if (ret == PM_END_OF_BUFFER) 787 while (count && (start_vaddr < end_vaddr)) {
812 ret = 0; 788 int len;
813 /* don't need mmap_sem for these, but this looks cleaner */ 789 unsigned long end;
814 *ppos += (char __user *)pm.out - buf; 790
815 if (!ret) 791 pm.pos = 0;
816 ret = (char __user *)pm.out - buf; 792 end = start_vaddr + PAGEMAP_WALK_SIZE;
817 793 /* overflow ? */
818out_pages: 794 if (end < start_vaddr || end > end_vaddr)
819 for (; pagecount; pagecount--) { 795 end = end_vaddr;
820 page = pages[pagecount-1]; 796 down_read(&mm->mmap_sem);
821 if (!PageReserved(page)) 797 ret = walk_page_range(start_vaddr, end, &pagemap_walk);
822 SetPageDirty(page); 798 up_read(&mm->mmap_sem);
823 page_cache_release(page); 799 start_vaddr = end;
800
801 len = min(count, PM_ENTRY_BYTES * pm.pos);
802 if (copy_to_user(buf, pm.buffer, len) < 0) {
803 ret = -EFAULT;
804 goto out_free;
805 }
806 copied += len;
807 buf += len;
808 count -= len;
824 } 809 }
810 *ppos += copied;
811 if (!ret || ret == PM_END_OF_BUFFER)
812 ret = copied;
813
825out_free: 814out_free:
826 kfree(pages); 815 kfree(pm.buffer);
827out_mm: 816out_mm:
828 mmput(mm); 817 mmput(mm);
829out_task: 818out_task:
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 04bf5d791bda..ab190511bc18 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1618,10 +1618,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1618 save_mount_options(s, data); 1618 save_mount_options(s, data);
1619 1619
1620 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); 1620 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
1621 if (!sbi) { 1621 if (!sbi)
1622 errval = -ENOMEM; 1622 return -ENOMEM;
1623 goto error_alloc;
1624 }
1625 s->s_fs_info = sbi; 1623 s->s_fs_info = sbi;
1626 /* Set default values for options: non-aggressive tails, RO on errors */ 1624 /* Set default values for options: non-aggressive tails, RO on errors */
1627 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); 1625 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
@@ -1878,12 +1876,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1878 return (0); 1876 return (0);
1879 1877
1880error: 1878error:
1881 reiserfs_write_unlock(s);
1882error_alloc:
1883 if (jinit_done) { /* kill the commit thread, free journal ram */ 1879 if (jinit_done) { /* kill the commit thread, free journal ram */
1884 journal_release_error(NULL, s); 1880 journal_release_error(NULL, s);
1885 } 1881 }
1886 1882
1883 reiserfs_write_unlock(s);
1884
1887 reiserfs_free_bitmap_cache(s); 1885 reiserfs_free_bitmap_cache(s);
1888 if (SB_BUFFER_WITH_SB(s)) 1886 if (SB_BUFFER_WITH_SB(s))
1889 brelse(SB_BUFFER_WITH_SB(s)); 1887 brelse(SB_BUFFER_WITH_SB(s));