aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/localalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/localalloc.c')
-rw-r--r--fs/ocfs2/localalloc.c275
1 files changed, 192 insertions, 83 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c983715d8d8c..3d7419682dc0 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52 52
53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54 struct ocfs2_dinode *alloc, 54 struct ocfs2_dinode *alloc,
55 u32 numbits); 55 u32 *numbits,
56 struct ocfs2_alloc_reservation *resv);
56 57
57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58 59
@@ -74,6 +75,144 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
75 struct inode *local_alloc_inode); 76 struct inode *local_alloc_inode);
76 77
78/*
79 * ocfs2_la_default_mb() - determine a default size, in megabytes of
80 * the local alloc.
81 *
82 * Generally, we'd like to pick as large a local alloc as
83 * possible. Performance on large workloads tends to scale
84 * proportionally to la size. In addition to that, the reservations
85 * code functions more efficiently as it can reserve more windows for
86 * write.
87 *
88 * Some things work against us when trying to choose a large local alloc:
89 *
90 * - We need to ensure our sizing is picked to leave enough space in
91 * group descriptors for other allocations (such as block groups,
92 * etc). Picking default sizes which are a multiple of 4 could help
93 * - block groups are allocated in 2mb and 4mb chunks.
94 *
95 * - Likewise, we don't want to starve other nodes of bits on small
96 * file systems. This can easily be taken care of by limiting our
97 * default to a reasonable size (256M) on larger cluster sizes.
98 *
99 * - Some file systems can't support very large sizes - 4k and 8k in
100 * particular are limited to less than 128 and 256 megabytes respectively.
101 *
102 * The following reference table shows group descriptor and local
103 * alloc maximums at various cluster sizes (4k blocksize)
104 *
105 * csize: 4K group: 126M la: 121M
106 * csize: 8K group: 252M la: 243M
107 * csize: 16K group: 504M la: 486M
108 * csize: 32K group: 1008M la: 972M
109 * csize: 64K group: 2016M la: 1944M
110 * csize: 128K group: 4032M la: 3888M
111 * csize: 256K group: 8064M la: 7776M
112 * csize: 512K group: 16128M la: 15552M
113 * csize: 1024K group: 32256M la: 31104M
114 */
115#define OCFS2_LA_MAX_DEFAULT_MB 256
116#define OCFS2_LA_OLD_DEFAULT 8
117unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{
119 unsigned int la_mb;
120 unsigned int gd_mb;
121 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb;
123
124 gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
125 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
126
127 /*
128 * This takes care of files systems with very small group
129 * descriptors - 512 byte blocksize at cluster sizes lower
130 * than 16K and also 1k blocksize with 4k cluster size.
131 */
132 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
133 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
134 return OCFS2_LA_OLD_DEFAULT;
135
136 /*
137 * Leave enough room for some block groups and make the final
138 * value we work from a multiple of 4.
139 */
140 gd_mb -= 16;
141 gd_mb &= 0xFFFFFFFB;
142
143 la_mb = gd_mb;
144
145 /*
146 * Keep window sizes down to a reasonable default
147 */
148 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
149 /*
150 * Some clustersize / blocksize combinations will have
151 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
152 * default size, but get poor distribution when
153 * limited to exactly 256 megabytes.
154 *
155 * As an example, 16K clustersize at 4K blocksize
156 * gives us a cluster group size of 504M. Paring the
157 * local alloc size down to 256 however, would give us
158 * only one window and around 200MB left in the
159 * cluster group. Instead, find the first size below
160 * 256 which would give us an even distribution.
161 *
162 * Larger cluster group sizes actually work out pretty
163 * well when pared to 256, so we don't have to do this
164 * for any group that fits more than two
165 * OCFS2_LA_MAX_DEFAULT_MB windows.
166 */
167 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
168 la_mb = 256;
169 else {
170 unsigned int gd_mult = gd_mb;
171
172 while (gd_mult > 256)
173 gd_mult = gd_mult >> 1;
174
175 la_mb = gd_mult;
176 }
177 }
178
179 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
180 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
181 /* Too many nodes, too few disk clusters. */
182 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot;
184
185 return la_mb;
186}
187
188void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
189{
190 struct super_block *sb = osb->sb;
191 unsigned int la_default_mb = ocfs2_la_default_mb(osb);
192 unsigned int la_max_mb;
193
194 la_max_mb = ocfs2_clusters_to_megabytes(sb,
195 ocfs2_local_alloc_size(sb) * 8);
196
197 mlog(0, "requested: %dM, max: %uM, default: %uM\n",
198 requested_mb, la_max_mb, la_default_mb);
199
200 if (requested_mb == -1) {
201 /* No user request - use defaults */
202 osb->local_alloc_default_bits =
203 ocfs2_megabytes_to_clusters(sb, la_default_mb);
204 } else if (requested_mb > la_max_mb) {
205 /* Request is too big, we give the maximum available */
206 osb->local_alloc_default_bits =
207 ocfs2_megabytes_to_clusters(sb, la_max_mb);
208 } else {
209 osb->local_alloc_default_bits =
210 ocfs2_megabytes_to_clusters(sb, requested_mb);
211 }
212
213 osb->local_alloc_bits = osb->local_alloc_default_bits;
214}
215
77static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 216static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
78{ 217{
79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 218 return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
@@ -156,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 295 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
157 osb->local_alloc_bits = 296 osb->local_alloc_bits =
158 ocfs2_megabytes_to_clusters(osb->sb, 297 ocfs2_megabytes_to_clusters(osb->sb,
159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 298 ocfs2_la_default_mb(osb));
160 } 299 }
161 300
162 /* read the alloc off disk */ 301 /* read the alloc off disk */
@@ -262,6 +401,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
262 401
263 osb->local_alloc_state = OCFS2_LA_DISABLED; 402 osb->local_alloc_state = OCFS2_LA_DISABLED;
264 403
404 ocfs2_resmap_uninit(&osb->osb_la_resmap);
405
265 main_bm_inode = ocfs2_get_system_file_inode(osb, 406 main_bm_inode = ocfs2_get_system_file_inode(osb,
266 GLOBAL_BITMAP_SYSTEM_INODE, 407 GLOBAL_BITMAP_SYSTEM_INODE,
267 OCFS2_INVALID_SLOT); 408 OCFS2_INVALID_SLOT);
@@ -305,12 +446,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
305 } 446 }
306 447
307 ocfs2_clear_local_alloc(alloc); 448 ocfs2_clear_local_alloc(alloc);
308 449 ocfs2_journal_dirty(handle, bh);
309 status = ocfs2_journal_dirty(handle, bh);
310 if (status < 0) {
311 mlog_errno(status);
312 goto out_commit;
313 }
314 450
315 brelse(bh); 451 brelse(bh);
316 osb->local_alloc_bh = NULL; 452 osb->local_alloc_bh = NULL;
@@ -481,46 +617,6 @@ out:
481 return status; 617 return status;
482} 618}
483 619
484/* Check to see if the local alloc window is within ac->ac_max_block */
485static int ocfs2_local_alloc_in_range(struct inode *inode,
486 struct ocfs2_alloc_context *ac,
487 u32 bits_wanted)
488{
489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
490 struct ocfs2_dinode *alloc;
491 struct ocfs2_local_alloc *la;
492 int start;
493 u64 block_off;
494
495 if (!ac->ac_max_block)
496 return 1;
497
498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
499 la = OCFS2_LOCAL_ALLOC(alloc);
500
501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
502 if (start == -1) {
503 mlog_errno(-ENOSPC);
504 return 0;
505 }
506
507 /*
508 * Converting (bm_off + start + bits_wanted) to blocks gives us
509 * the blkno just past our actual allocation. This is perfect
510 * to compare with ac_max_block.
511 */
512 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
513 le32_to_cpu(la->la_bm_off) +
514 start + bits_wanted);
515 mlog(0, "Checking %llu against %llu\n",
516 (unsigned long long)block_off,
517 (unsigned long long)ac->ac_max_block);
518 if (block_off > ac->ac_max_block)
519 return 0;
520
521 return 1;
522}
523
524/* 620/*
525 * make sure we've got at least bits_wanted contiguous bits in the 621 * make sure we've got at least bits_wanted contiguous bits in the
526 * local alloc. You lose them when you drop i_mutex. 622 * local alloc. You lose them when you drop i_mutex.
@@ -613,17 +709,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
613 mlog(0, "Calling in_range for max block %llu\n", 709 mlog(0, "Calling in_range for max block %llu\n",
614 (unsigned long long)ac->ac_max_block); 710 (unsigned long long)ac->ac_max_block);
615 711
616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
617 bits_wanted)) {
618 /*
619 * The window is outside ac->ac_max_block.
620 * This errno tells the caller to keep localalloc enabled
621 * but to get the allocation from the main bitmap.
622 */
623 status = -EFBIG;
624 goto bail;
625 }
626
627 ac->ac_inode = local_alloc_inode; 712 ac->ac_inode = local_alloc_inode;
628 /* We should never use localalloc from another slot */ 713 /* We should never use localalloc from another slot */
629 ac->ac_alloc_slot = osb->slot_num; 714 ac->ac_alloc_slot = osb->slot_num;
@@ -664,7 +749,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 749 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
665 la = OCFS2_LOCAL_ALLOC(alloc); 750 la = OCFS2_LOCAL_ALLOC(alloc);
666 751
667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 752 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
753 ac->ac_resv);
668 if (start == -1) { 754 if (start == -1) {
669 /* TODO: Shouldn't we just BUG here? */ 755 /* TODO: Shouldn't we just BUG here? */
670 status = -ENOSPC; 756 status = -ENOSPC;
@@ -674,8 +760,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
674 760
675 bitmap = la->la_bitmap; 761 bitmap = la->la_bitmap;
676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 762 *bit_off = le32_to_cpu(la->la_bm_off) + start;
677 /* local alloc is always contiguous by nature -- we never
678 * delete bits from it! */
679 *num_bits = bits_wanted; 763 *num_bits = bits_wanted;
680 764
681 status = ocfs2_journal_access_di(handle, 765 status = ocfs2_journal_access_di(handle,
@@ -687,18 +771,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
687 goto bail; 771 goto bail;
688 } 772 }
689 773
774 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
775 bits_wanted);
776
690 while(bits_wanted--) 777 while(bits_wanted--)
691 ocfs2_set_bit(start++, bitmap); 778 ocfs2_set_bit(start++, bitmap);
692 779
693 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 780 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
781 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
694 782
695 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
696 if (status < 0) {
697 mlog_errno(status);
698 goto bail;
699 }
700
701 status = 0;
702bail: 783bail:
703 mlog_exit(status); 784 mlog_exit(status);
704 return status; 785 return status;
@@ -722,13 +803,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
722} 803}
723 804
724static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 805static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
725 struct ocfs2_dinode *alloc, 806 struct ocfs2_dinode *alloc,
726 u32 numbits) 807 u32 *numbits,
808 struct ocfs2_alloc_reservation *resv)
727{ 809{
728 int numfound, bitoff, left, startoff, lastzero; 810 int numfound, bitoff, left, startoff, lastzero;
811 int local_resv = 0;
812 struct ocfs2_alloc_reservation r;
729 void *bitmap = NULL; 813 void *bitmap = NULL;
814 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
730 815
731 mlog_entry("(numbits wanted = %u)\n", numbits); 816 mlog_entry("(numbits wanted = %u)\n", *numbits);
732 817
733 if (!alloc->id1.bitmap1.i_total) { 818 if (!alloc->id1.bitmap1.i_total) {
734 mlog(0, "No bits in my window!\n"); 819 mlog(0, "No bits in my window!\n");
@@ -736,6 +821,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
736 goto bail; 821 goto bail;
737 } 822 }
738 823
824 if (!resv) {
825 local_resv = 1;
826 ocfs2_resv_init_once(&r);
827 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
828 resv = &r;
829 }
830
831 numfound = *numbits;
832 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
833 if (numfound < *numbits)
834 *numbits = numfound;
835 goto bail;
836 }
837
838 /*
839 * Code error. While reservations are enabled, local
840 * allocation should _always_ go through them.
841 */
842 BUG_ON(osb->osb_resv_level != 0);
843
844 /*
845 * Reservations are disabled. Handle this the old way.
846 */
847
739 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 848 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
740 849
741 numfound = bitoff = startoff = 0; 850 numfound = bitoff = startoff = 0;
@@ -761,7 +870,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
761 startoff = bitoff+1; 870 startoff = bitoff+1;
762 } 871 }
763 /* we got everything we needed */ 872 /* we got everything we needed */
764 if (numfound == numbits) { 873 if (numfound == *numbits) {
765 /* mlog(0, "Found it all!\n"); */ 874 /* mlog(0, "Found it all!\n"); */
766 break; 875 break;
767 } 876 }
@@ -770,12 +879,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
770 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 879 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
771 numfound); 880 numfound);
772 881
773 if (numfound == numbits) 882 if (numfound == *numbits)
774 bitoff = startoff - numfound; 883 bitoff = startoff - numfound;
775 else 884 else
776 bitoff = -1; 885 bitoff = -1;
777 886
778bail: 887bail:
888 if (local_resv)
889 ocfs2_resv_discard(resmap, resv);
890
779 mlog_exit(bitoff); 891 mlog_exit(bitoff);
780 return bitoff; 892 return bitoff;
781} 893}
@@ -1049,7 +1161,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1049 /* we used the generic suballoc reserve function, but we set 1161 /* we used the generic suballoc reserve function, but we set
1050 * everything up nicely, so there's no reason why we can't use 1162 * everything up nicely, so there's no reason why we can't use
1051 * the more specific cluster api to claim bits. */ 1163 * the more specific cluster api to claim bits. */
1052 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1164 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
1053 &cluster_off, &cluster_count); 1165 &cluster_off, &cluster_count);
1054 if (status == -ENOSPC) { 1166 if (status == -ENOSPC) {
1055retry_enospc: 1167retry_enospc:
@@ -1063,7 +1175,7 @@ retry_enospc:
1063 goto bail; 1175 goto bail;
1064 1176
1065 ac->ac_bits_wanted = osb->local_alloc_default_bits; 1177 ac->ac_bits_wanted = osb->local_alloc_default_bits;
1066 status = ocfs2_claim_clusters(osb, handle, ac, 1178 status = ocfs2_claim_clusters(handle, ac,
1067 osb->local_alloc_bits, 1179 osb->local_alloc_bits,
1068 &cluster_off, 1180 &cluster_off,
1069 &cluster_count); 1181 &cluster_count);
@@ -1098,6 +1210,9 @@ retry_enospc:
1098 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1210 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1099 le16_to_cpu(la->la_size)); 1211 le16_to_cpu(la->la_size));
1100 1212
1213 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1214 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1215
1101 mlog(0, "New window allocated:\n"); 1216 mlog(0, "New window allocated:\n");
1102 mlog(0, "window la_bm_off = %u\n", 1217 mlog(0, "window la_bm_off = %u\n",
1103 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1218 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
@@ -1169,12 +1284,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1169 } 1284 }
1170 1285
1171 ocfs2_clear_local_alloc(alloc); 1286 ocfs2_clear_local_alloc(alloc);
1172 1287 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1173 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1174 if (status < 0) {
1175 mlog_errno(status);
1176 goto bail;
1177 }
1178 1288
1179 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1289 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1180 main_bm_inode, main_bm_bh); 1290 main_bm_inode, main_bm_bh);
@@ -1192,7 +1302,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1192 1302
1193 atomic_inc(&osb->alloc_stats.moves); 1303 atomic_inc(&osb->alloc_stats.moves);
1194 1304
1195 status = 0;
1196bail: 1305bail:
1197 if (handle) 1306 if (handle)
1198 ocfs2_commit_trans(osb, handle); 1307 ocfs2_commit_trans(osb, handle);