diff options
author | Mark Fasheh <mfasheh@suse.com> | 2010-04-05 21:17:14 -0400 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2010-05-05 21:18:07 -0400 |
commit | 6b82021b9e91cd689fdffadbcdb9a42597bbe764 (patch) | |
tree | ac4235e792e74a2e60a41e95d62965b7ed4b3232 /fs/ocfs2/localalloc.c | |
parent | 73c8a80003d13be54e2309865030404441075182 (diff) |
ocfs2: increase the default size of local alloc windows
I have observed that the current size of 8M gives us pretty poor
fragmentation on multi-threaded workloads which do lots of writes.
Generally, I can increase the size of local alloc windows and observe a
marked decrease in fragmentation, even up and beyond window sizes of 512
megabytes. This makes sense for a couple reasons - larger local alloc means
more room for reservation windows. On multi-node workloads the larger local
alloc helps as well because we don't have to do window slides as often.
Also, I removed the OCFS2_DEFAULT_LOCAL_ALLOC_SIZE constant as it is no
longer used and the comment above it was out of date.
To test fragmentation, I used a workload which launched 4 threads that did
4k writes into a series of about 140 alternating files.
With resv_level=2, and a 4k/4k file system I observed the following average
fragmentation for various localalloc= parameters:
localalloc= avg. fragmentation
8 48
32 16
64 10
120 7
On larger cluster sizes, the difference is more dramatic.
The new default size top out at 256M, which we'll only get for cluster
sizes of 32K and above.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2/localalloc.c')
-rw-r--r-- | fs/ocfs2/localalloc.c | 114 |
1 files changed, 112 insertions, 2 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index e39a3e7146c9..00022aac2e8c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -75,10 +75,120 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
76 | struct inode *local_alloc_inode); | 76 | struct inode *local_alloc_inode); |
77 | 77 | ||
78 | /* | ||
79 | * ocfs2_la_default_mb() - determine a default size, in megabytes of | ||
80 | * the local alloc. | ||
81 | * | ||
82 | * Generally, we'd like to pick as large a local alloc as | ||
83 | * possible. Performance on large workloads tends to scale | ||
84 | * proportionally to la size. In addition to that, the reservations | ||
85 | * code functions more efficiently as it can reserve more windows for | ||
86 | * write. | ||
87 | * | ||
88 | * Some things work against us when trying to choose a large local alloc: | ||
89 | * | ||
90 | * - We need to ensure our sizing is picked to leave enough space in | ||
91 | * group descriptors for other allocations (such as block groups, | ||
92 | * etc). Picking default sizes which are a multiple of 4 could help | ||
93 | * - block groups are allocated in 2mb and 4mb chunks. | ||
94 | * | ||
95 | * - Likewise, we don't want to starve other nodes of bits on small | ||
96 | * file systems. This can easily be taken care of by limiting our | ||
97 | * default to a reasonable size (256M) on larger cluster sizes. | ||
98 | * | ||
99 | * - Some file systems can't support very large sizes - 4k and 8k in | ||
100 | * particular are limited to less than 128 and 256 megabytes respectively. | ||
101 | * | ||
102 | * The following reference table shows group descriptor and local | ||
103 | * alloc maximums at various cluster sizes (4k blocksize) | ||
104 | * | ||
105 | * csize: 4K group: 126M la: 121M | ||
106 | * csize: 8K group: 252M la: 243M | ||
107 | * csize: 16K group: 504M la: 486M | ||
108 | * csize: 32K group: 1008M la: 972M | ||
109 | * csize: 64K group: 2016M la: 1944M | ||
110 | * csize: 128K group: 4032M la: 3888M | ||
111 | * csize: 256K group: 8064M la: 7776M | ||
112 | * csize: 512K group: 16128M la: 15552M | ||
113 | * csize: 1024K group: 32256M la: 31104M | ||
114 | */ | ||
115 | #define OCFS2_LA_MAX_DEFAULT_MB 256 | ||
116 | #define OCFS2_LA_OLD_DEFAULT 8 | ||
117 | unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) | ||
118 | { | ||
119 | unsigned int la_mb; | ||
120 | unsigned int gd_mb; | ||
121 | unsigned int megs_per_slot; | ||
122 | struct super_block *sb = osb->sb; | ||
123 | |||
124 | gd_mb = ocfs2_clusters_to_megabytes(osb->sb, | ||
125 | 8 * ocfs2_group_bitmap_size(sb)); | ||
126 | |||
127 | /* | ||
128 | * This takes care of files systems with very small group | ||
129 | * descriptors - 512 byte blocksize at cluster sizes lower | ||
130 | * than 16K and also 1k blocksize with 4k cluster size. | ||
131 | */ | ||
132 | if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) | ||
133 | || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) | ||
134 | return OCFS2_LA_OLD_DEFAULT; | ||
135 | |||
136 | /* | ||
137 | * Leave enough room for some block groups and make the final | ||
138 | * value we work from a multiple of 4. | ||
139 | */ | ||
140 | gd_mb -= 16; | ||
141 | gd_mb &= 0xFFFFFFFB; | ||
142 | |||
143 | la_mb = gd_mb; | ||
144 | |||
145 | /* | ||
146 | * Keep window sizes down to a reasonable default | ||
147 | */ | ||
148 | if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { | ||
149 | /* | ||
150 | * Some clustersize / blocksize combinations will have | ||
151 | * given us a larger than OCFS2_LA_MAX_DEFAULT_MB | ||
152 | * default size, but get poor distribution when | ||
153 | * limited to exactly 256 megabytes. | ||
154 | * | ||
155 | * As an example, 16K clustersize at 4K blocksize | ||
156 | * gives us a cluster group size of 504M. Paring the | ||
157 | * local alloc size down to 256 however, would give us | ||
158 | * only one window and around 200MB left in the | ||
159 | * cluster group. Instead, find the first size below | ||
160 | * 256 which would give us an even distribution. | ||
161 | * | ||
162 | * Larger cluster group sizes actually work out pretty | ||
163 | * well when pared to 256, so we don't have to do this | ||
164 | * for any group that fits more than two | ||
165 | * OCFS2_LA_MAX_DEFAULT_MB windows. | ||
166 | */ | ||
167 | if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) | ||
168 | la_mb = 256; | ||
169 | else { | ||
170 | unsigned int gd_mult = gd_mb; | ||
171 | |||
172 | while (gd_mult > 256) | ||
173 | gd_mult = gd_mult >> 1; | ||
174 | |||
175 | la_mb = gd_mult; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; | ||
180 | megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); | ||
181 | /* Too many nodes, too few disk clusters. */ | ||
182 | if (megs_per_slot < la_mb) | ||
183 | la_mb = megs_per_slot; | ||
184 | |||
185 | return la_mb; | ||
186 | } | ||
187 | |||
78 | void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) | 188 | void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) |
79 | { | 189 | { |
80 | struct super_block *sb = osb->sb; | 190 | struct super_block *sb = osb->sb; |
81 | unsigned int la_default_mb = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 191 | unsigned int la_default_mb = ocfs2_la_default_mb(osb); |
82 | unsigned int la_max_mb; | 192 | unsigned int la_max_mb; |
83 | 193 | ||
84 | la_max_mb = ocfs2_clusters_to_megabytes(sb, | 194 | la_max_mb = ocfs2_clusters_to_megabytes(sb, |
@@ -185,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
185 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); | 295 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); |
186 | osb->local_alloc_bits = | 296 | osb->local_alloc_bits = |
187 | ocfs2_megabytes_to_clusters(osb->sb, | 297 | ocfs2_megabytes_to_clusters(osb->sb, |
188 | OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); | 298 | ocfs2_la_default_mb(osb)); |
189 | } | 299 | } |
190 | 300 | ||
191 | /* read the alloc off disk */ | 301 | /* read the alloc off disk */ |