diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-17 12:53:50 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:04 -0400 |
commit | e6dcd2dc9c489108648e2ed543315dd134d50a9a (patch) | |
tree | cddf6f588b65e28c5feb8bff89b22d8ff70f8a50 /fs/btrfs/file-item.c | |
parent | 77a41afb7d0dd0f27b6f2f1a5bc701929c7034de (diff) |
Btrfs: New data=ordered implementation
The old data=ordered code would force commit to wait until
all the data extents from the transaction were fully on disk. This
introduced large latencies into the commit and stalled new writers
in the transaction for a long time.
The new code changes the way data allocations and extents work:
* When delayed allocation is filled, data extents are reserved, and
the extent bit EXTENT_ORDERED is set on the entire range of the extent.
A struct btrfs_ordered_extent is allocated an inserted into a per-inode
rbtree to track the pending extents.
* As each page is written EXTENT_ORDERED is cleared on the bytes corresponding
to that page.
* When all of the bytes corresponding to a single struct btrfs_ordered_extent
are written, The previously reserved extent is inserted into the FS
btree and into the extent allocation trees. The checksums for the file
data are also updated.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file-item.c')
-rw-r--r-- | fs/btrfs/file-item.c | 62 |
1 files changed, 36 insertions, 26 deletions
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f537eb43c2c6..345caf8ff516 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
135 | } | 135 | } |
136 | 136 | ||
137 | int btrfs_csum_one_bio(struct btrfs_root *root, | 137 | int btrfs_csum_one_bio(struct btrfs_root *root, |
138 | struct bio *bio, char **sums_ret) | 138 | struct bio *bio, struct btrfs_ordered_sum **sums_ret) |
139 | { | 139 | { |
140 | u32 *sums; | 140 | struct btrfs_ordered_sum *sums; |
141 | struct btrfs_sector_sum *sector_sum; | ||
141 | char *data; | 142 | char *data; |
142 | struct bio_vec *bvec = bio->bi_io_vec; | 143 | struct bio_vec *bvec = bio->bi_io_vec; |
143 | int bio_index = 0; | 144 | int bio_index = 0; |
144 | 145 | ||
145 | sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); | 146 | WARN_ON(bio->bi_vcnt <= 0); |
147 | sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); | ||
146 | if (!sums) | 148 | if (!sums) |
147 | return -ENOMEM; | 149 | return -ENOMEM; |
148 | *sums_ret = (char *)sums; | 150 | *sums_ret = sums; |
151 | sector_sum = &sums->sums; | ||
152 | sums->file_offset = page_offset(bvec->bv_page); | ||
153 | sums->len = bio->bi_size; | ||
154 | INIT_LIST_HEAD(&sums->list); | ||
149 | 155 | ||
150 | while(bio_index < bio->bi_vcnt) { | 156 | while(bio_index < bio->bi_vcnt) { |
151 | data = kmap_atomic(bvec->bv_page, KM_USER0); | 157 | data = kmap_atomic(bvec->bv_page, KM_USER0); |
152 | *sums = ~(u32)0; | 158 | sector_sum->sum = ~(u32)0; |
153 | *sums = btrfs_csum_data(root, data + bvec->bv_offset, | 159 | sector_sum->sum = btrfs_csum_data(root, |
154 | *sums, bvec->bv_len); | 160 | data + bvec->bv_offset, |
161 | sector_sum->sum, | ||
162 | bvec->bv_len); | ||
155 | kunmap_atomic(data, KM_USER0); | 163 | kunmap_atomic(data, KM_USER0); |
156 | btrfs_csum_final(*sums, (char *)sums); | 164 | btrfs_csum_final(sector_sum->sum, |
157 | sums++; | 165 | (char *)§or_sum->sum); |
166 | sector_sum->offset = page_offset(bvec->bv_page) + | ||
167 | bvec->bv_offset; | ||
168 | sector_sum++; | ||
158 | bio_index++; | 169 | bio_index++; |
159 | bvec++; | 170 | bvec++; |
160 | } | 171 | } |
@@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, | |||
163 | 174 | ||
164 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | 175 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
165 | struct btrfs_root *root, struct inode *inode, | 176 | struct btrfs_root *root, struct inode *inode, |
166 | struct bio *bio, char *sums) | 177 | struct btrfs_ordered_sum *sums) |
167 | { | 178 | { |
168 | u64 objectid = inode->i_ino; | 179 | u64 objectid = inode->i_ino; |
169 | u64 offset; | 180 | u64 offset; |
@@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
171 | struct btrfs_key file_key; | 182 | struct btrfs_key file_key; |
172 | struct btrfs_key found_key; | 183 | struct btrfs_key found_key; |
173 | u64 next_offset; | 184 | u64 next_offset; |
185 | u64 total_bytes = 0; | ||
174 | int found_next; | 186 | int found_next; |
175 | struct btrfs_path *path; | 187 | struct btrfs_path *path; |
176 | struct btrfs_csum_item *item; | 188 | struct btrfs_csum_item *item; |
177 | struct btrfs_csum_item *item_end; | 189 | struct btrfs_csum_item *item_end; |
178 | struct extent_buffer *leaf = NULL; | 190 | struct extent_buffer *leaf = NULL; |
179 | u64 csum_offset; | 191 | u64 csum_offset; |
180 | u32 *sums32 = (u32 *)sums; | 192 | struct btrfs_sector_sum *sector_sum; |
181 | u32 nritems; | 193 | u32 nritems; |
182 | u32 ins_size; | 194 | u32 ins_size; |
183 | int bio_index = 0; | ||
184 | struct bio_vec *bvec = bio->bi_io_vec; | ||
185 | char *eb_map; | 195 | char *eb_map; |
186 | char *eb_token; | 196 | char *eb_token; |
187 | unsigned long map_len; | 197 | unsigned long map_len; |
@@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
189 | 199 | ||
190 | path = btrfs_alloc_path(); | 200 | path = btrfs_alloc_path(); |
191 | BUG_ON(!path); | 201 | BUG_ON(!path); |
202 | sector_sum = &sums->sums; | ||
192 | again: | 203 | again: |
193 | next_offset = (u64)-1; | 204 | next_offset = (u64)-1; |
194 | found_next = 0; | 205 | found_next = 0; |
195 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 206 | offset = sector_sum->offset; |
196 | file_key.objectid = objectid; | 207 | file_key.objectid = objectid; |
197 | file_key.offset = offset; | 208 | file_key.offset = offset; |
198 | btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); | 209 | btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); |
@@ -303,7 +314,7 @@ found: | |||
303 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 314 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
304 | btrfs_item_size_nr(leaf, path->slots[0])); | 315 | btrfs_item_size_nr(leaf, path->slots[0])); |
305 | eb_token = NULL; | 316 | eb_token = NULL; |
306 | next_bvec: | 317 | next_sector: |
307 | 318 | ||
308 | if (!eb_token || | 319 | if (!eb_token || |
309 | (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { | 320 | (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { |
@@ -321,21 +332,20 @@ next_bvec: | |||
321 | } | 332 | } |
322 | if (eb_token) { | 333 | if (eb_token) { |
323 | memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), | 334 | memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), |
324 | sums32, BTRFS_CRC32_SIZE); | 335 | §or_sum->sum, BTRFS_CRC32_SIZE); |
325 | } else { | 336 | } else { |
326 | write_extent_buffer(leaf, sums32, (unsigned long)item, | 337 | write_extent_buffer(leaf, §or_sum->sum, |
327 | BTRFS_CRC32_SIZE); | 338 | (unsigned long)item, BTRFS_CRC32_SIZE); |
328 | } | 339 | } |
329 | bio_index++; | 340 | total_bytes += root->sectorsize; |
330 | bvec++; | 341 | sector_sum++; |
331 | sums32++; | 342 | if (total_bytes < sums->len) { |
332 | if (bio_index < bio->bi_vcnt) { | ||
333 | item = (struct btrfs_csum_item *)((char *)item + | 343 | item = (struct btrfs_csum_item *)((char *)item + |
334 | BTRFS_CRC32_SIZE); | 344 | BTRFS_CRC32_SIZE); |
335 | if (item < item_end && offset + PAGE_CACHE_SIZE == | 345 | if (item < item_end && offset + PAGE_CACHE_SIZE == |
336 | page_offset(bvec->bv_page)) { | 346 | sector_sum->offset) { |
337 | offset = page_offset(bvec->bv_page); | 347 | offset = sector_sum->offset; |
338 | goto next_bvec; | 348 | goto next_sector; |
339 | } | 349 | } |
340 | } | 350 | } |
341 | if (eb_token) { | 351 | if (eb_token) { |
@@ -343,7 +353,7 @@ next_bvec: | |||
343 | eb_token = NULL; | 353 | eb_token = NULL; |
344 | } | 354 | } |
345 | btrfs_mark_buffer_dirty(path->nodes[0]); | 355 | btrfs_mark_buffer_dirty(path->nodes[0]); |
346 | if (bio_index < bio->bi_vcnt) { | 356 | if (total_bytes < sums->len) { |
347 | btrfs_release_path(root, path); | 357 | btrfs_release_path(root, path); |
348 | goto again; | 358 | goto again; |
349 | } | 359 | } |