aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/localalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/localalloc.c')
-rw-r--r--fs/ocfs2/localalloc.c282
1 files changed, 199 insertions, 83 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c983715d8d8c..ec6adbf8f551 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52 52
53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54 struct ocfs2_dinode *alloc, 54 struct ocfs2_dinode *alloc,
55 u32 numbits); 55 u32 *numbits,
56 struct ocfs2_alloc_reservation *resv);
56 57
57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58 59
@@ -74,6 +75,151 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
75 struct inode *local_alloc_inode); 76 struct inode *local_alloc_inode);
76 77
78/*
79 * ocfs2_la_default_mb() - determine a default size, in megabytes of
80 * the local alloc.
81 *
82 * Generally, we'd like to pick as large a local alloc as
83 * possible. Performance on large workloads tends to scale
84 * proportionally to la size. In addition to that, the reservations
85 * code functions more efficiently as it can reserve more windows for
86 * write.
87 *
88 * Some things work against us when trying to choose a large local alloc:
89 *
90 * - We need to ensure our sizing is picked to leave enough space in
91 * group descriptors for other allocations (such as block groups,
92 * etc). Picking default sizes which are a multiple of 4 could help
93 * - block groups are allocated in 2mb and 4mb chunks.
94 *
95 * - Likewise, we don't want to starve other nodes of bits on small
96 * file systems. This can easily be taken care of by limiting our
97 * default to a reasonable size (256M) on larger cluster sizes.
98 *
99 * - Some file systems can't support very large sizes - 4k and 8k in
100 * particular are limited to less than 128 and 256 megabytes respectively.
101 *
102 * The following reference table shows group descriptor and local
103 * alloc maximums at various cluster sizes (4k blocksize)
104 *
105 * csize: 4K group: 126M la: 121M
106 * csize: 8K group: 252M la: 243M
107 * csize: 16K group: 504M la: 486M
108 * csize: 32K group: 1008M la: 972M
109 * csize: 64K group: 2016M la: 1944M
110 * csize: 128K group: 4032M la: 3888M
111 * csize: 256K group: 8064M la: 7776M
112 * csize: 512K group: 16128M la: 15552M
113 * csize: 1024K group: 32256M la: 31104M
114 */
115#define OCFS2_LA_MAX_DEFAULT_MB 256
116#define OCFS2_LA_OLD_DEFAULT 8
117unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{
119 unsigned int la_mb;
120 unsigned int gd_mb;
121 unsigned int la_max_mb;
122 unsigned int megs_per_slot;
123 struct super_block *sb = osb->sb;
124
125 gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
126 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
127
128 /*
129 * This takes care of files systems with very small group
130 * descriptors - 512 byte blocksize at cluster sizes lower
131 * than 16K and also 1k blocksize with 4k cluster size.
132 */
133 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
134 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
135 return OCFS2_LA_OLD_DEFAULT;
136
137 /*
138 * Leave enough room for some block groups and make the final
139 * value we work from a multiple of 4.
140 */
141 gd_mb -= 16;
142 gd_mb &= 0xFFFFFFFB;
143
144 la_mb = gd_mb;
145
146 /*
147 * Keep window sizes down to a reasonable default
148 */
149 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
150 /*
151 * Some clustersize / blocksize combinations will have
152 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
153 * default size, but get poor distribution when
154 * limited to exactly 256 megabytes.
155 *
156 * As an example, 16K clustersize at 4K blocksize
157 * gives us a cluster group size of 504M. Paring the
158 * local alloc size down to 256 however, would give us
159 * only one window and around 200MB left in the
160 * cluster group. Instead, find the first size below
161 * 256 which would give us an even distribution.
162 *
163 * Larger cluster group sizes actually work out pretty
164 * well when pared to 256, so we don't have to do this
165 * for any group that fits more than two
166 * OCFS2_LA_MAX_DEFAULT_MB windows.
167 */
168 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
169 la_mb = 256;
170 else {
171 unsigned int gd_mult = gd_mb;
172
173 while (gd_mult > 256)
174 gd_mult = gd_mult >> 1;
175
176 la_mb = gd_mult;
177 }
178 }
179
180 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
181 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
182 /* Too many nodes, too few disk clusters. */
183 if (megs_per_slot < la_mb)
184 la_mb = megs_per_slot;
185
186 /* We can't store more bits than we can in a block. */
187 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
188 ocfs2_local_alloc_size(sb) * 8);
189 if (la_mb > la_max_mb)
190 la_mb = la_max_mb;
191
192 return la_mb;
193}
194
195void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
196{
197 struct super_block *sb = osb->sb;
198 unsigned int la_default_mb = ocfs2_la_default_mb(osb);
199 unsigned int la_max_mb;
200
201 la_max_mb = ocfs2_clusters_to_megabytes(sb,
202 ocfs2_local_alloc_size(sb) * 8);
203
204 mlog(0, "requested: %dM, max: %uM, default: %uM\n",
205 requested_mb, la_max_mb, la_default_mb);
206
207 if (requested_mb == -1) {
208 /* No user request - use defaults */
209 osb->local_alloc_default_bits =
210 ocfs2_megabytes_to_clusters(sb, la_default_mb);
211 } else if (requested_mb > la_max_mb) {
212 /* Request is too big, we give the maximum available */
213 osb->local_alloc_default_bits =
214 ocfs2_megabytes_to_clusters(sb, la_max_mb);
215 } else {
216 osb->local_alloc_default_bits =
217 ocfs2_megabytes_to_clusters(sb, requested_mb);
218 }
219
220 osb->local_alloc_bits = osb->local_alloc_default_bits;
221}
222
77static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 223static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
78{ 224{
79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 225 return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
@@ -156,7 +302,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 302 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
157 osb->local_alloc_bits = 303 osb->local_alloc_bits =
158 ocfs2_megabytes_to_clusters(osb->sb, 304 ocfs2_megabytes_to_clusters(osb->sb,
159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 305 ocfs2_la_default_mb(osb));
160 } 306 }
161 307
162 /* read the alloc off disk */ 308 /* read the alloc off disk */
@@ -262,6 +408,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
262 408
263 osb->local_alloc_state = OCFS2_LA_DISABLED; 409 osb->local_alloc_state = OCFS2_LA_DISABLED;
264 410
411 ocfs2_resmap_uninit(&osb->osb_la_resmap);
412
265 main_bm_inode = ocfs2_get_system_file_inode(osb, 413 main_bm_inode = ocfs2_get_system_file_inode(osb,
266 GLOBAL_BITMAP_SYSTEM_INODE, 414 GLOBAL_BITMAP_SYSTEM_INODE,
267 OCFS2_INVALID_SLOT); 415 OCFS2_INVALID_SLOT);
@@ -305,12 +453,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
305 } 453 }
306 454
307 ocfs2_clear_local_alloc(alloc); 455 ocfs2_clear_local_alloc(alloc);
308 456 ocfs2_journal_dirty(handle, bh);
309 status = ocfs2_journal_dirty(handle, bh);
310 if (status < 0) {
311 mlog_errno(status);
312 goto out_commit;
313 }
314 457
315 brelse(bh); 458 brelse(bh);
316 osb->local_alloc_bh = NULL; 459 osb->local_alloc_bh = NULL;
@@ -481,46 +624,6 @@ out:
481 return status; 624 return status;
482} 625}
483 626
484/* Check to see if the local alloc window is within ac->ac_max_block */
485static int ocfs2_local_alloc_in_range(struct inode *inode,
486 struct ocfs2_alloc_context *ac,
487 u32 bits_wanted)
488{
489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
490 struct ocfs2_dinode *alloc;
491 struct ocfs2_local_alloc *la;
492 int start;
493 u64 block_off;
494
495 if (!ac->ac_max_block)
496 return 1;
497
498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
499 la = OCFS2_LOCAL_ALLOC(alloc);
500
501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
502 if (start == -1) {
503 mlog_errno(-ENOSPC);
504 return 0;
505 }
506
507 /*
508 * Converting (bm_off + start + bits_wanted) to blocks gives us
509 * the blkno just past our actual allocation. This is perfect
510 * to compare with ac_max_block.
511 */
512 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
513 le32_to_cpu(la->la_bm_off) +
514 start + bits_wanted);
515 mlog(0, "Checking %llu against %llu\n",
516 (unsigned long long)block_off,
517 (unsigned long long)ac->ac_max_block);
518 if (block_off > ac->ac_max_block)
519 return 0;
520
521 return 1;
522}
523
524/* 627/*
525 * make sure we've got at least bits_wanted contiguous bits in the 628 * make sure we've got at least bits_wanted contiguous bits in the
526 * local alloc. You lose them when you drop i_mutex. 629 * local alloc. You lose them when you drop i_mutex.
@@ -613,17 +716,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
613 mlog(0, "Calling in_range for max block %llu\n", 716 mlog(0, "Calling in_range for max block %llu\n",
614 (unsigned long long)ac->ac_max_block); 717 (unsigned long long)ac->ac_max_block);
615 718
616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
617 bits_wanted)) {
618 /*
619 * The window is outside ac->ac_max_block.
620 * This errno tells the caller to keep localalloc enabled
621 * but to get the allocation from the main bitmap.
622 */
623 status = -EFBIG;
624 goto bail;
625 }
626
627 ac->ac_inode = local_alloc_inode; 719 ac->ac_inode = local_alloc_inode;
628 /* We should never use localalloc from another slot */ 720 /* We should never use localalloc from another slot */
629 ac->ac_alloc_slot = osb->slot_num; 721 ac->ac_alloc_slot = osb->slot_num;
@@ -664,7 +756,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 756 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
665 la = OCFS2_LOCAL_ALLOC(alloc); 757 la = OCFS2_LOCAL_ALLOC(alloc);
666 758
667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 759 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
760 ac->ac_resv);
668 if (start == -1) { 761 if (start == -1) {
669 /* TODO: Shouldn't we just BUG here? */ 762 /* TODO: Shouldn't we just BUG here? */
670 status = -ENOSPC; 763 status = -ENOSPC;
@@ -674,8 +767,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
674 767
675 bitmap = la->la_bitmap; 768 bitmap = la->la_bitmap;
676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 769 *bit_off = le32_to_cpu(la->la_bm_off) + start;
677 /* local alloc is always contiguous by nature -- we never
678 * delete bits from it! */
679 *num_bits = bits_wanted; 770 *num_bits = bits_wanted;
680 771
681 status = ocfs2_journal_access_di(handle, 772 status = ocfs2_journal_access_di(handle,
@@ -687,18 +778,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
687 goto bail; 778 goto bail;
688 } 779 }
689 780
781 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
782 bits_wanted);
783
690 while(bits_wanted--) 784 while(bits_wanted--)
691 ocfs2_set_bit(start++, bitmap); 785 ocfs2_set_bit(start++, bitmap);
692 786
693 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 787 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
788 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
694 789
695 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
696 if (status < 0) {
697 mlog_errno(status);
698 goto bail;
699 }
700
701 status = 0;
702bail: 790bail:
703 mlog_exit(status); 791 mlog_exit(status);
704 return status; 792 return status;
@@ -722,13 +810,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
722} 810}
723 811
724static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 812static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
725 struct ocfs2_dinode *alloc, 813 struct ocfs2_dinode *alloc,
726 u32 numbits) 814 u32 *numbits,
815 struct ocfs2_alloc_reservation *resv)
727{ 816{
728 int numfound, bitoff, left, startoff, lastzero; 817 int numfound, bitoff, left, startoff, lastzero;
818 int local_resv = 0;
819 struct ocfs2_alloc_reservation r;
729 void *bitmap = NULL; 820 void *bitmap = NULL;
821 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
730 822
731 mlog_entry("(numbits wanted = %u)\n", numbits); 823 mlog_entry("(numbits wanted = %u)\n", *numbits);
732 824
733 if (!alloc->id1.bitmap1.i_total) { 825 if (!alloc->id1.bitmap1.i_total) {
734 mlog(0, "No bits in my window!\n"); 826 mlog(0, "No bits in my window!\n");
@@ -736,6 +828,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
736 goto bail; 828 goto bail;
737 } 829 }
738 830
831 if (!resv) {
832 local_resv = 1;
833 ocfs2_resv_init_once(&r);
834 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
835 resv = &r;
836 }
837
838 numfound = *numbits;
839 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
840 if (numfound < *numbits)
841 *numbits = numfound;
842 goto bail;
843 }
844
845 /*
846 * Code error. While reservations are enabled, local
847 * allocation should _always_ go through them.
848 */
849 BUG_ON(osb->osb_resv_level != 0);
850
851 /*
852 * Reservations are disabled. Handle this the old way.
853 */
854
739 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 855 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
740 856
741 numfound = bitoff = startoff = 0; 857 numfound = bitoff = startoff = 0;
@@ -761,7 +877,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
761 startoff = bitoff+1; 877 startoff = bitoff+1;
762 } 878 }
763 /* we got everything we needed */ 879 /* we got everything we needed */
764 if (numfound == numbits) { 880 if (numfound == *numbits) {
765 /* mlog(0, "Found it all!\n"); */ 881 /* mlog(0, "Found it all!\n"); */
766 break; 882 break;
767 } 883 }
@@ -770,12 +886,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
770 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 886 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
771 numfound); 887 numfound);
772 888
773 if (numfound == numbits) 889 if (numfound == *numbits)
774 bitoff = startoff - numfound; 890 bitoff = startoff - numfound;
775 else 891 else
776 bitoff = -1; 892 bitoff = -1;
777 893
778bail: 894bail:
895 if (local_resv)
896 ocfs2_resv_discard(resmap, resv);
897
779 mlog_exit(bitoff); 898 mlog_exit(bitoff);
780 return bitoff; 899 return bitoff;
781} 900}
@@ -1049,7 +1168,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1049 /* we used the generic suballoc reserve function, but we set 1168 /* we used the generic suballoc reserve function, but we set
1050 * everything up nicely, so there's no reason why we can't use 1169 * everything up nicely, so there's no reason why we can't use
1051 * the more specific cluster api to claim bits. */ 1170 * the more specific cluster api to claim bits. */
1052 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1171 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
1053 &cluster_off, &cluster_count); 1172 &cluster_off, &cluster_count);
1054 if (status == -ENOSPC) { 1173 if (status == -ENOSPC) {
1055retry_enospc: 1174retry_enospc:
@@ -1063,7 +1182,7 @@ retry_enospc:
1063 goto bail; 1182 goto bail;
1064 1183
1065 ac->ac_bits_wanted = osb->local_alloc_default_bits; 1184 ac->ac_bits_wanted = osb->local_alloc_default_bits;
1066 status = ocfs2_claim_clusters(osb, handle, ac, 1185 status = ocfs2_claim_clusters(handle, ac,
1067 osb->local_alloc_bits, 1186 osb->local_alloc_bits,
1068 &cluster_off, 1187 &cluster_off,
1069 &cluster_count); 1188 &cluster_count);
@@ -1098,6 +1217,9 @@ retry_enospc:
1098 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1217 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1099 le16_to_cpu(la->la_size)); 1218 le16_to_cpu(la->la_size));
1100 1219
1220 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1221 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1222
1101 mlog(0, "New window allocated:\n"); 1223 mlog(0, "New window allocated:\n");
1102 mlog(0, "window la_bm_off = %u\n", 1224 mlog(0, "window la_bm_off = %u\n",
1103 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1225 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
@@ -1169,12 +1291,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1169 } 1291 }
1170 1292
1171 ocfs2_clear_local_alloc(alloc); 1293 ocfs2_clear_local_alloc(alloc);
1172 1294 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1173 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1174 if (status < 0) {
1175 mlog_errno(status);
1176 goto bail;
1177 }
1178 1295
1179 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1296 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1180 main_bm_inode, main_bm_bh); 1297 main_bm_inode, main_bm_bh);
@@ -1192,7 +1309,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1192 1309
1193 atomic_inc(&osb->alloc_stats.moves); 1310 atomic_inc(&osb->alloc_stats.moves);
1194 1311
1195 status = 0;
1196bail: 1312bail:
1197 if (handle) 1313 if (handle)
1198 ocfs2_commit_trans(osb, handle); 1314 ocfs2_commit_trans(osb, handle);