aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@suse.de>2006-08-15 15:31:36 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2006-08-15 15:31:36 -0400
commit80914d97aa0c61fbfd8eadd120c863b6229f0809 (patch)
tree4d4d08a8ccbfcdabb40cc457badfd053056a7bdd /fs
parent74361cb6828398a96167b3234e186fbd731e5f30 (diff)
parent883d4cae4a2b01a05193cf2665c77b7489a8b6a0 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c1
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c43
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/suballoc.c261
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--fs/ocfs2/super.c8
7 files changed, 263 insertions, 62 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 1b8346dd0572..9503240ef0e5 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2375,7 +2375,6 @@ leave:
2375 mlog(0, "returning %d\n", ret); 2375 mlog(0, "returning %d\n", ret);
2376 return ret; 2376 return ret;
2377} 2377}
2378EXPORT_SYMBOL_GPL(dlm_migrate_lockres);
2379 2378
2380int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) 2379int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
2381{ 2380{
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b0c3134f4f70..37be4b2e0d4a 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
155 else 155 else
156 status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); 156 status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
157 157
158 if (status != DLM_NORMAL) 158 if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
159 goto leave; 159 goto leave;
160 160
161 /* By now this has been masked out of cancel requests. */ 161 /* By now this has been masked out of cancel requests. */
@@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
183 spin_lock(&lock->spinlock); 183 spin_lock(&lock->spinlock);
184 /* if the master told us the lock was already granted, 184 /* if the master told us the lock was already granted,
185 * let the ast handle all of these actions */ 185 * let the ast handle all of these actions */
186 if (status == DLM_NORMAL && 186 if (status == DLM_CANCELGRANT) {
187 lksb->status == DLM_CANCELGRANT) {
188 actions &= ~(DLM_UNLOCK_REMOVE_LOCK| 187 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
189 DLM_UNLOCK_REGRANT_LOCK| 188 DLM_UNLOCK_REGRANT_LOCK|
190 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 189 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
@@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
349 vec, veclen, owner, &status); 348 vec, veclen, owner, &status);
350 if (tmpret >= 0) { 349 if (tmpret >= 0) {
351 // successfully sent and received 350 // successfully sent and received
352 if (status == DLM_CANCELGRANT) 351 if (status == DLM_FORWARD)
353 ret = DLM_NORMAL;
354 else if (status == DLM_FORWARD) {
355 mlog(0, "master was in-progress. retry\n"); 352 mlog(0, "master was in-progress. retry\n");
356 ret = DLM_FORWARD; 353 ret = status;
357 } else
358 ret = status;
359 lksb->status = status;
360 } else { 354 } else {
361 mlog_errno(tmpret); 355 mlog_errno(tmpret);
362 if (dlm_is_host_down(tmpret)) { 356 if (dlm_is_host_down(tmpret)) {
@@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
372 /* something bad. this will BUG in ocfs2 */ 366 /* something bad. this will BUG in ocfs2 */
373 ret = dlm_err_to_dlm_status(tmpret); 367 ret = dlm_err_to_dlm_status(tmpret);
374 } 368 }
375 lksb->status = ret;
376 } 369 }
377 370
378 return ret; 371 return ret;
@@ -483,6 +476,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
483 476
484 /* lock was found on queue */ 477 /* lock was found on queue */
485 lksb = lock->lksb; 478 lksb = lock->lksb;
479 if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
480 lock->ml.type != LKM_EXMODE)
481 flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
482
486 /* unlockast only called on originating node */ 483 /* unlockast only called on originating node */
487 if (flags & LKM_PUT_LVB) { 484 if (flags & LKM_PUT_LVB) {
488 lksb->flags |= DLM_LKSB_PUT_LVB; 485 lksb->flags |= DLM_LKSB_PUT_LVB;
@@ -507,11 +504,8 @@ not_found:
507 "cookie=%u:%llu\n", 504 "cookie=%u:%llu\n",
508 dlm_get_lock_cookie_node(unlock->cookie), 505 dlm_get_lock_cookie_node(unlock->cookie),
509 dlm_get_lock_cookie_seq(unlock->cookie)); 506 dlm_get_lock_cookie_seq(unlock->cookie));
510 else { 507 else
511 /* send the lksb->status back to the other node */
512 status = lksb->status;
513 dlm_lock_put(lock); 508 dlm_lock_put(lock);
514 }
515 509
516leave: 510leave:
517 if (res) 511 if (res)
@@ -533,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
533 527
534 if (dlm_lock_on_list(&res->blocked, lock)) { 528 if (dlm_lock_on_list(&res->blocked, lock)) {
535 /* cancel this outright */ 529 /* cancel this outright */
536 lksb->status = DLM_NORMAL;
537 status = DLM_NORMAL; 530 status = DLM_NORMAL;
538 *actions = (DLM_UNLOCK_CALL_AST | 531 *actions = (DLM_UNLOCK_CALL_AST |
539 DLM_UNLOCK_REMOVE_LOCK); 532 DLM_UNLOCK_REMOVE_LOCK);
540 } else if (dlm_lock_on_list(&res->converting, lock)) { 533 } else if (dlm_lock_on_list(&res->converting, lock)) {
541 /* cancel the request, put back on granted */ 534 /* cancel the request, put back on granted */
542 lksb->status = DLM_NORMAL;
543 status = DLM_NORMAL; 535 status = DLM_NORMAL;
544 *actions = (DLM_UNLOCK_CALL_AST | 536 *actions = (DLM_UNLOCK_CALL_AST |
545 DLM_UNLOCK_REMOVE_LOCK | 537 DLM_UNLOCK_REMOVE_LOCK |
546 DLM_UNLOCK_REGRANT_LOCK | 538 DLM_UNLOCK_REGRANT_LOCK |
547 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 539 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
548 } else if (dlm_lock_on_list(&res->granted, lock)) { 540 } else if (dlm_lock_on_list(&res->granted, lock)) {
549 /* too late, already granted. DLM_CANCELGRANT */ 541 /* too late, already granted. */
550 lksb->status = DLM_CANCELGRANT; 542 status = DLM_CANCELGRANT;
551 status = DLM_NORMAL;
552 *actions = DLM_UNLOCK_CALL_AST; 543 *actions = DLM_UNLOCK_CALL_AST;
553 } else { 544 } else {
554 mlog(ML_ERROR, "lock to cancel is not on any list!\n"); 545 mlog(ML_ERROR, "lock to cancel is not on any list!\n");
555 lksb->status = DLM_IVLOCKID;
556 status = DLM_IVLOCKID; 546 status = DLM_IVLOCKID;
557 *actions = 0; 547 *actions = 0;
558 } 548 }
@@ -569,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
569 559
570 /* unlock request */ 560 /* unlock request */
571 if (!dlm_lock_on_list(&res->granted, lock)) { 561 if (!dlm_lock_on_list(&res->granted, lock)) {
572 lksb->status = DLM_DENIED;
573 status = DLM_DENIED; 562 status = DLM_DENIED;
574 dlm_error(status); 563 dlm_error(status);
575 *actions = 0; 564 *actions = 0;
576 } else { 565 } else {
577 /* unlock granted lock */ 566 /* unlock granted lock */
578 lksb->status = DLM_NORMAL;
579 status = DLM_NORMAL; 567 status = DLM_NORMAL;
580 *actions = (DLM_UNLOCK_FREE_LOCK | 568 *actions = (DLM_UNLOCK_FREE_LOCK |
581 DLM_UNLOCK_CALL_AST | 569 DLM_UNLOCK_CALL_AST |
@@ -632,6 +620,8 @@ retry:
632 620
633 spin_lock(&res->spinlock); 621 spin_lock(&res->spinlock);
634 is_master = (res->owner == dlm->node_num); 622 is_master = (res->owner == dlm->node_num);
623 if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
624 flags &= ~LKM_VALBLK;
635 spin_unlock(&res->spinlock); 625 spin_unlock(&res->spinlock);
636 626
637 if (is_master) { 627 if (is_master) {
@@ -665,7 +655,7 @@ retry:
665 } 655 }
666 656
667 if (call_ast) { 657 if (call_ast) {
668 mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status); 658 mlog(0, "calling unlockast(%p, %d)\n", data, status);
669 if (is_master) { 659 if (is_master) {
670 /* it is possible that there is one last bast 660 /* it is possible that there is one last bast
671 * pending. make sure it is flushed, then 661 * pending. make sure it is flushed, then
@@ -677,9 +667,12 @@ retry:
677 wait_event(dlm->ast_wq, 667 wait_event(dlm->ast_wq,
678 dlm_lock_basts_flushed(dlm, lock)); 668 dlm_lock_basts_flushed(dlm, lock));
679 } 669 }
680 (*unlockast)(data, lksb->status); 670 (*unlockast)(data, status);
681 } 671 }
682 672
673 if (status == DLM_CANCELGRANT)
674 status = DLM_NORMAL;
675
683 if (status == DLM_NORMAL) { 676 if (status == DLM_NORMAL) {
684 mlog(0, "kicking the thread\n"); 677 mlog(0, "kicking the thread\n");
685 dlm_kick_thread(dlm, res); 678 dlm_kick_thread(dlm, res);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0d1973ea32b0..1f17a4d08287 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
840 840
841 mlog(0, "Allocating %u clusters for a new window.\n", 841 mlog(0, "Allocating %u clusters for a new window.\n",
842 ocfs2_local_alloc_window_bits(osb)); 842 ocfs2_local_alloc_window_bits(osb));
843
844 /* Instruct the allocation code to try the most recently used
845 * cluster group. We'll re-record the group used this pass
846 * below. */
847 ac->ac_last_group = osb->la_last_gd;
848
843 /* we used the generic suballoc reserve function, but we set 849 /* we used the generic suballoc reserve function, but we set
844 * everything up nicely, so there's no reason why we can't use 850 * everything up nicely, so there's no reason why we can't use
845 * the more specific cluster api to claim bits. */ 851 * the more specific cluster api to claim bits. */
@@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
852 goto bail; 858 goto bail;
853 } 859 }
854 860
861 osb->la_last_gd = ac->ac_last_group;
862
855 la->la_bm_off = cpu_to_le32(cluster_off); 863 la->la_bm_off = cpu_to_le32(cluster_off);
856 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 864 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
857 /* just in case... In the future when we find space ourselves, 865 /* just in case... In the future when we find space ourselves,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index cd4a6f253d13..0462a7f4e21b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -197,7 +197,6 @@ struct ocfs2_super
197 struct ocfs2_node_map recovery_map; 197 struct ocfs2_node_map recovery_map;
198 struct ocfs2_node_map umount_map; 198 struct ocfs2_node_map umount_map;
199 199
200 u32 num_clusters;
201 u64 root_blkno; 200 u64 root_blkno;
202 u64 system_dir_blkno; 201 u64 system_dir_blkno;
203 u64 bitmap_blkno; 202 u64 bitmap_blkno;
@@ -237,6 +236,7 @@ struct ocfs2_super
237 236
238 enum ocfs2_local_alloc_state local_alloc_state; 237 enum ocfs2_local_alloc_state local_alloc_state;
239 struct buffer_head *local_alloc_bh; 238 struct buffer_head *local_alloc_bh;
239 u64 la_last_gd;
240 240
241 /* Next two fields are for local node slot recovery during 241 /* Next two fields are for local node slot recovery during
242 * mount. */ 242 * mount. */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 195523090c87..9d91e66f51a9 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode,
70 struct buffer_head *group_bh, 70 struct buffer_head *group_bh,
71 u32 bits_wanted, u32 min_bits, 71 u32 bits_wanted, u32 min_bits,
72 u16 *bit_off, u16 *bits_found); 72 u16 *bit_off, u16 *bits_found);
73static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
74 u32 bits_wanted,
75 u32 min_bits,
76 u16 *bit_off,
77 unsigned int *num_bits,
78 u64 *bg_blkno);
79static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 73static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
80 struct ocfs2_alloc_context *ac, 74 struct ocfs2_alloc_context *ac,
81 u32 bits_wanted, 75 u32 bits_wanted,
@@ -85,11 +79,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
85 u64 *bg_blkno); 79 u64 *bg_blkno);
86static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 80static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
87 int nr); 81 int nr);
88static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
89 struct buffer_head *bg_bh,
90 unsigned int bits_wanted,
91 u16 *bit_off,
92 u16 *bits_found);
93static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 82static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
94 struct inode *alloc_inode, 83 struct inode *alloc_inode,
95 struct ocfs2_group_desc *bg, 84 struct ocfs2_group_desc *bg,
@@ -143,6 +132,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
143 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 132 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
144} 133}
145 134
135/* somewhat more expensive than our other checks, so use sparingly. */
136static int ocfs2_check_group_descriptor(struct super_block *sb,
137 struct ocfs2_dinode *di,
138 struct ocfs2_group_desc *gd)
139{
140 unsigned int max_bits;
141
142 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
143 OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
144 return -EIO;
145 }
146
147 if (di->i_blkno != gd->bg_parent_dinode) {
148 ocfs2_error(sb, "Group descriptor # %llu has bad parent "
149 "pointer (%llu, expected %llu)",
150 (unsigned long long)le64_to_cpu(gd->bg_blkno),
151 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
152 (unsigned long long)le64_to_cpu(di->i_blkno));
153 return -EIO;
154 }
155
156 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
157 if (le16_to_cpu(gd->bg_bits) > max_bits) {
158 ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
159 (unsigned long long)le64_to_cpu(gd->bg_blkno),
160 le16_to_cpu(gd->bg_bits));
161 return -EIO;
162 }
163
164 if (le16_to_cpu(gd->bg_chain) >=
165 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
166 ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
167 (unsigned long long)le64_to_cpu(gd->bg_blkno),
168 le16_to_cpu(gd->bg_chain));
169 return -EIO;
170 }
171
172 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
173 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
174 "claims that %u are free",
175 (unsigned long long)le64_to_cpu(gd->bg_blkno),
176 le16_to_cpu(gd->bg_bits),
177 le16_to_cpu(gd->bg_free_bits_count));
178 return -EIO;
179 }
180
181 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
182 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
183 "max bitmap bits of %u",
184 (unsigned long long)le64_to_cpu(gd->bg_blkno),
185 le16_to_cpu(gd->bg_bits),
186 8 * le16_to_cpu(gd->bg_size));
187 return -EIO;
188 }
189
190 return 0;
191}
192
146static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 193static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
147 struct inode *alloc_inode, 194 struct inode *alloc_inode,
148 struct buffer_head *bg_bh, 195 struct buffer_head *bg_bh,
@@ -663,6 +710,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
663static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 710static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
664 struct buffer_head *bg_bh, 711 struct buffer_head *bg_bh,
665 unsigned int bits_wanted, 712 unsigned int bits_wanted,
713 unsigned int total_bits,
666 u16 *bit_off, 714 u16 *bit_off,
667 u16 *bits_found) 715 u16 *bits_found)
668{ 716{
@@ -679,10 +727,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
679 found = start = best_offset = best_size = 0; 727 found = start = best_offset = best_size = 0;
680 bitmap = bg->bg_bitmap; 728 bitmap = bg->bg_bitmap;
681 729
682 while((offset = ocfs2_find_next_zero_bit(bitmap, 730 while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
683 le16_to_cpu(bg->bg_bits), 731 if (offset == total_bits)
684 start)) != -1) {
685 if (offset == le16_to_cpu(bg->bg_bits))
686 break; 732 break;
687 733
688 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { 734 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -911,14 +957,35 @@ static int ocfs2_cluster_group_search(struct inode *inode,
911{ 957{
912 int search = -ENOSPC; 958 int search = -ENOSPC;
913 int ret; 959 int ret;
914 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 960 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
915 u16 tmp_off, tmp_found; 961 u16 tmp_off, tmp_found;
962 unsigned int max_bits, gd_cluster_off;
916 963
917 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 964 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
918 965
919 if (bg->bg_free_bits_count) { 966 if (gd->bg_free_bits_count) {
967 max_bits = le16_to_cpu(gd->bg_bits);
968
969 /* Tail groups in cluster bitmaps which aren't cpg
970 * aligned are prone to partial extention by a failed
971 * fs resize. If the file system resize never got to
972 * update the dinode cluster count, then we don't want
973 * to trust any clusters past it, regardless of what
974 * the group descriptor says. */
975 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
976 le64_to_cpu(gd->bg_blkno));
977 if ((gd_cluster_off + max_bits) >
978 OCFS2_I(inode)->ip_clusters) {
979 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
980 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
981 (unsigned long long)le64_to_cpu(gd->bg_blkno),
982 le16_to_cpu(gd->bg_bits),
983 OCFS2_I(inode)->ip_clusters, max_bits);
984 }
985
920 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 986 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
921 group_bh, bits_wanted, 987 group_bh, bits_wanted,
988 max_bits,
922 &tmp_off, &tmp_found); 989 &tmp_off, &tmp_found);
923 if (ret) 990 if (ret)
924 return ret; 991 return ret;
@@ -951,17 +1018,109 @@ static int ocfs2_block_group_search(struct inode *inode,
951 if (bg->bg_free_bits_count) 1018 if (bg->bg_free_bits_count)
952 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1019 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
953 group_bh, bits_wanted, 1020 group_bh, bits_wanted,
1021 le16_to_cpu(bg->bg_bits),
954 bit_off, bits_found); 1022 bit_off, bits_found);
955 1023
956 return ret; 1024 return ret;
957} 1025}
958 1026
1027static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1028 struct ocfs2_journal_handle *handle,
1029 struct buffer_head *di_bh,
1030 u32 num_bits,
1031 u16 chain)
1032{
1033 int ret;
1034 u32 tmp_used;
1035 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1036 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1037
1038 ret = ocfs2_journal_access(handle, inode, di_bh,
1039 OCFS2_JOURNAL_ACCESS_WRITE);
1040 if (ret < 0) {
1041 mlog_errno(ret);
1042 goto out;
1043 }
1044
1045 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1046 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1047 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1048
1049 ret = ocfs2_journal_dirty(handle, di_bh);
1050 if (ret < 0)
1051 mlog_errno(ret);
1052
1053out:
1054 return ret;
1055}
1056
1057static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1058 u32 bits_wanted,
1059 u32 min_bits,
1060 u16 *bit_off,
1061 unsigned int *num_bits,
1062 u64 gd_blkno,
1063 u16 *bits_left)
1064{
1065 int ret;
1066 u16 found;
1067 struct buffer_head *group_bh = NULL;
1068 struct ocfs2_group_desc *gd;
1069 struct inode *alloc_inode = ac->ac_inode;
1070 struct ocfs2_journal_handle *handle = ac->ac_handle;
1071
1072 ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
1073 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1074 if (ret < 0) {
1075 mlog_errno(ret);
1076 return ret;
1077 }
1078
1079 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1080 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
1081 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
1082 ret = -EIO;
1083 goto out;
1084 }
1085
1086 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1087 bit_off, &found);
1088 if (ret < 0) {
1089 if (ret != -ENOSPC)
1090 mlog_errno(ret);
1091 goto out;
1092 }
1093
1094 *num_bits = found;
1095
1096 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1097 *num_bits,
1098 le16_to_cpu(gd->bg_chain));
1099 if (ret < 0) {
1100 mlog_errno(ret);
1101 goto out;
1102 }
1103
1104 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1105 *bit_off, *num_bits);
1106 if (ret < 0)
1107 mlog_errno(ret);
1108
1109 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1110
1111out:
1112 brelse(group_bh);
1113
1114 return ret;
1115}
1116
959static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 1117static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
960 u32 bits_wanted, 1118 u32 bits_wanted,
961 u32 min_bits, 1119 u32 min_bits,
962 u16 *bit_off, 1120 u16 *bit_off,
963 unsigned int *num_bits, 1121 unsigned int *num_bits,
964 u64 *bg_blkno) 1122 u64 *bg_blkno,
1123 u16 *bits_left)
965{ 1124{
966 int status; 1125 int status;
967 u16 chain, tmp_bits; 1126 u16 chain, tmp_bits;
@@ -988,9 +1147,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
988 goto bail; 1147 goto bail;
989 } 1148 }
990 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1149 bg = (struct ocfs2_group_desc *) group_bh->b_data;
991 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1150 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
992 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1151 if (status) {
993 status = -EIO; 1152 mlog_errno(status);
994 goto bail; 1153 goto bail;
995 } 1154 }
996 1155
@@ -1018,9 +1177,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1018 goto bail; 1177 goto bail;
1019 } 1178 }
1020 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1179 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1021 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1180 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1022 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1181 if (status) {
1023 status = -EIO; 1182 mlog_errno(status);
1024 goto bail; 1183 goto bail;
1025 } 1184 }
1026 } 1185 }
@@ -1099,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1099 (unsigned long long)fe->i_blkno); 1258 (unsigned long long)fe->i_blkno);
1100 1259
1101 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1260 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1261 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1102bail: 1262bail:
1103 if (group_bh) 1263 if (group_bh)
1104 brelse(group_bh); 1264 brelse(group_bh);
@@ -1120,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1120{ 1280{
1121 int status; 1281 int status;
1122 u16 victim, i; 1282 u16 victim, i;
1283 u16 bits_left = 0;
1284 u64 hint_blkno = ac->ac_last_group;
1123 struct ocfs2_chain_list *cl; 1285 struct ocfs2_chain_list *cl;
1124 struct ocfs2_dinode *fe; 1286 struct ocfs2_dinode *fe;
1125 1287
@@ -1146,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1146 goto bail; 1308 goto bail;
1147 } 1309 }
1148 1310
1311 if (hint_blkno) {
1312 /* Attempt to short-circuit the usual search mechanism
1313 * by jumping straight to the most recently used
1314 * allocation group. This helps us mantain some
1315 * contiguousness across allocations. */
1316 status = ocfs2_search_one_group(ac, bits_wanted, min_bits,
1317 bit_off, num_bits,
1318 hint_blkno, &bits_left);
1319 if (!status) {
1320 /* Be careful to update *bg_blkno here as the
1321 * caller is expecting it to be filled in, and
1322 * ocfs2_search_one_group() won't do that for
1323 * us. */
1324 *bg_blkno = hint_blkno;
1325 goto set_hint;
1326 }
1327 if (status < 0 && status != -ENOSPC) {
1328 mlog_errno(status);
1329 goto bail;
1330 }
1331 }
1332
1149 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 1333 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1150 1334
1151 victim = ocfs2_find_victim_chain(cl); 1335 victim = ocfs2_find_victim_chain(cl);
@@ -1153,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1153 ac->ac_allow_chain_relink = 1; 1337 ac->ac_allow_chain_relink = 1;
1154 1338
1155 status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, 1339 status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
1156 num_bits, bg_blkno); 1340 num_bits, bg_blkno, &bits_left);
1157 if (!status) 1341 if (!status)
1158 goto bail; 1342 goto set_hint;
1159 if (status < 0 && status != -ENOSPC) { 1343 if (status < 0 && status != -ENOSPC) {
1160 mlog_errno(status); 1344 mlog_errno(status);
1161 goto bail; 1345 goto bail;
@@ -1177,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1177 1361
1178 ac->ac_chain = i; 1362 ac->ac_chain = i;
1179 status = ocfs2_search_chain(ac, bits_wanted, min_bits, 1363 status = ocfs2_search_chain(ac, bits_wanted, min_bits,
1180 bit_off, num_bits, 1364 bit_off, num_bits, bg_blkno,
1181 bg_blkno); 1365 &bits_left);
1182 if (!status) 1366 if (!status)
1183 break; 1367 break;
1184 if (status < 0 && status != -ENOSPC) { 1368 if (status < 0 && status != -ENOSPC) {
@@ -1186,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1186 goto bail; 1370 goto bail;
1187 } 1371 }
1188 } 1372 }
1189bail:
1190 1373
1374set_hint:
1375 if (status != -ENOSPC) {
1376 /* If the next search of this group is not likely to
1377 * yield a suitable extent, then we reset the last
1378 * group hint so as to not waste a disk read */
1379 if (bits_left < min_bits)
1380 ac->ac_last_group = 0;
1381 else
1382 ac->ac_last_group = *bg_blkno;
1383 }
1384
1385bail:
1191 mlog_exit(status); 1386 mlog_exit(status);
1192 return status; 1387 return status;
1193} 1388}
@@ -1341,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
1341{ 1536{
1342 int status; 1537 int status;
1343 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 1538 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1344 u64 bg_blkno; 1539 u64 bg_blkno = 0;
1345 u16 bg_bit_off; 1540 u16 bg_bit_off;
1346 1541
1347 mlog_entry_void(); 1542 mlog_entry_void();
@@ -1494,9 +1689,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
1494 } 1689 }
1495 1690
1496 group = (struct ocfs2_group_desc *) group_bh->b_data; 1691 group = (struct ocfs2_group_desc *) group_bh->b_data;
1497 if (!OCFS2_IS_VALID_GROUP_DESC(group)) { 1692 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
1498 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); 1693 if (status) {
1499 status = -EIO; 1694 mlog_errno(status);
1500 goto bail; 1695 goto bail;
1501 } 1696 }
1502 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1697 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a76c82a7ceac..c787838d1052 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,6 +49,8 @@ struct ocfs2_alloc_context {
49 u16 ac_chain; 49 u16 ac_chain;
50 int ac_allow_chain_relink; 50 int ac_allow_chain_relink;
51 group_search_t *ac_group_search; 51 group_search_t *ac_group_search;
52
53 u64 ac_last_group;
52}; 54};
53 55
54void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); 56void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 382706a67ffd..d17e33e66a1e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
1442 1442
1443 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 1443 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
1444 1444
1445 /* We don't have a cluster lock on the bitmap here because
1446 * we're only interested in static information and the extra
1447 * complexity at mount time isn't worht it. Don't pass the
1448 * inode in to the read function though as we don't want it to
1449 * be put in the cache. */
1445 status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, 1450 status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
1446 inode); 1451 NULL);
1447 iput(inode); 1452 iput(inode);
1448 if (status < 0) { 1453 if (status < 0) {
1449 mlog_errno(status); 1454 mlog_errno(status);
@@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1452 1457
1453 di = (struct ocfs2_dinode *) bitmap_bh->b_data; 1458 di = (struct ocfs2_dinode *) bitmap_bh->b_data;
1454 osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); 1459 osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
1455 osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
1456 brelse(bitmap_bh); 1460 brelse(bitmap_bh);
1457 mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", 1461 mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
1458 (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); 1462 (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);