diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-05-12 12:09:15 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-05-12 12:09:15 -0400 |
commit | e90c01e148b967d30caf59e76accb3a58ca6b74b (patch) | |
tree | 92f9b45febbdbc52174307e2e73dbb26aa893465 /fs/gfs2 | |
parent | 7d63b54a65ce902f9aaa8efe8192aa3b983264d4 (diff) |
[GFS2] Reverse block order in build_height
The original code ordered the blocks allocated in the build_height
routine backwards causing excessive disk seeks during a read of the
metadata. This patch reverses the order to try and reduce disk seeks.
Example: A five level metadata tree, I = Inode, P = Pointers, D = Data
You need to read the blocks in the order:
I P5 P4 P3 P2 P1 D
in order to read a single data block. The new code now orders the blocks
in this way. The old code used to order them as:
I P1 P2 P3 P4 P5 D
requiring two extra seeks on average. Note that for files which are
grown by gradual extension rather than by truncate or by llseek/write
at a large offset, this doesn't apply. In the case of writing to a
file linearly, this routine will only be called upon to extend the
height of the tree by one block at a time, so the ordering is
determined by when its called rather than by the internals of the
routine itself. Optimising that part of the ordering is a much
harder problem.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/bmap.c | 102 |
1 files changed, 46 insertions, 56 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 474b9a16f0f5..31c3e92820e4 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -164,72 +164,62 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) | |||
164 | * @ip: The GFS2 inode | 164 | * @ip: The GFS2 inode |
165 | * @height: The height to build to | 165 | * @height: The height to build to |
166 | * | 166 | * |
167 | * This routine makes sure that the metadata tree is tall enough to hold | ||
168 | * "size" bytes of data. | ||
169 | * | 167 | * |
170 | * Returns: errno | 168 | * Returns: errno |
171 | */ | 169 | */ |
172 | 170 | ||
173 | static int build_height(struct gfs2_inode *ip, int height) | 171 | static int build_height(struct inode *inode, unsigned height) |
174 | { | 172 | { |
175 | struct gfs2_sbd *sdp = ip->i_sbd; | 173 | struct gfs2_inode *ip = inode->u.generic_ip; |
176 | struct buffer_head *bh, *dibh; | 174 | unsigned new_height = height - ip->i_di.di_height; |
177 | uint64_t block = 0, *bp; | 175 | struct buffer_head *dibh; |
178 | unsigned int x; | 176 | struct buffer_head *blocks[GFS2_MAX_META_HEIGHT]; |
179 | int new_block; | ||
180 | int error; | 177 | int error; |
178 | u64 *bp; | ||
179 | u64 bn; | ||
180 | unsigned n; | ||
181 | 181 | ||
182 | while (ip->i_di.di_height < height) { | 182 | if (height <= ip->i_di.di_height) |
183 | error = gfs2_meta_inode_buffer(ip, &dibh); | 183 | return 0; |
184 | if (error) | ||
185 | return error; | ||
186 | |||
187 | new_block = 0; | ||
188 | bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); | ||
189 | for (x = 0; x < sdp->sd_diptrs; x++, bp++) | ||
190 | if (*bp) { | ||
191 | new_block = 1; | ||
192 | break; | ||
193 | } | ||
194 | |||
195 | if (new_block) { | ||
196 | /* Get a new block, fill it with the old direct | ||
197 | pointers, and write it out */ | ||
198 | 184 | ||
199 | block = gfs2_alloc_meta(ip); | 185 | error = gfs2_meta_inode_buffer(ip, &dibh); |
186 | if (error) | ||
187 | return error; | ||
200 | 188 | ||
201 | bh = gfs2_meta_new(ip->i_gl, block); | 189 | for(n = 0; n < new_height; n++) { |
202 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 190 | bn = gfs2_alloc_meta(ip); |
203 | gfs2_metatype_set(bh, | 191 | blocks[n] = gfs2_meta_new(ip->i_gl, bn); |
204 | GFS2_METATYPE_IN, | 192 | gfs2_trans_add_bh(ip->i_gl, blocks[n], 1); |
193 | } | ||
194 | |||
195 | n = 0; | ||
196 | bn = blocks[0]->b_blocknr; | ||
197 | if (new_height > 1) { | ||
198 | for(; n < new_height-1; n++) { | ||
199 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, | ||
205 | GFS2_FORMAT_IN); | 200 | GFS2_FORMAT_IN); |
206 | gfs2_buffer_copy_tail(bh, | 201 | gfs2_buffer_clear_tail(blocks[n], |
207 | sizeof(struct gfs2_meta_header), | 202 | sizeof(struct gfs2_meta_header)); |
208 | dibh, sizeof(struct gfs2_dinode)); | 203 | bp = (u64 *)(blocks[n]->b_data + |
209 | 204 | sizeof(struct gfs2_meta_header)); | |
210 | brelse(bh); | 205 | *bp = cpu_to_be64(blocks[n+1]->b_blocknr); |
211 | } | 206 | brelse(blocks[n]); |
212 | 207 | blocks[n] = NULL; | |
213 | /* Set up the new direct pointer and write it out to disk */ | ||
214 | |||
215 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
216 | |||
217 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
218 | |||
219 | if (new_block) { | ||
220 | *(uint64_t *)(dibh->b_data + | ||
221 | sizeof(struct gfs2_dinode)) = | ||
222 | cpu_to_be64(block); | ||
223 | ip->i_di.di_blocks++; | ||
224 | } | 208 | } |
225 | |||
226 | ip->i_di.di_height++; | ||
227 | |||
228 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
229 | brelse(dibh); | ||
230 | } | 209 | } |
231 | 210 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); | |
232 | return 0; | 211 | gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header), |
212 | dibh, sizeof(struct gfs2_dinode)); | ||
213 | brelse(blocks[n]); | ||
214 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
215 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
216 | bp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); | ||
217 | *bp = cpu_to_be64(bn); | ||
218 | ip->i_di.di_height += new_height; | ||
219 | ip->i_di.di_blocks += new_height; | ||
220 | gfs2_dinode_out(&ip->i_di, dibh->b_data); | ||
221 | brelse(dibh); | ||
222 | return error; | ||
233 | } | 223 | } |
234 | 224 | ||
235 | /** | 225 | /** |
@@ -416,7 +406,7 @@ static struct buffer_head *gfs2_block_pointers(struct inode *inode, u64 lblock, | |||
416 | if (!create) | 406 | if (!create) |
417 | goto out; | 407 | goto out; |
418 | 408 | ||
419 | error = build_height(ip, height); | 409 | error = build_height(inode, height); |
420 | if (error) | 410 | if (error) |
421 | goto out; | 411 | goto out; |
422 | } | 412 | } |
@@ -806,7 +796,7 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size) | |||
806 | h = calc_tree_height(ip, size); | 796 | h = calc_tree_height(ip, size); |
807 | if (ip->i_di.di_height < h) { | 797 | if (ip->i_di.di_height < h) { |
808 | down_write(&ip->i_rw_mutex); | 798 | down_write(&ip->i_rw_mutex); |
809 | error = build_height(ip, h); | 799 | error = build_height(ip->i_vnode, h); |
810 | up_write(&ip->i_rw_mutex); | 800 | up_write(&ip->i_rw_mutex); |
811 | if (error) | 801 | if (error) |
812 | goto out_end_trans; | 802 | goto out_end_trans; |