aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/free-space-cache.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-07-02 12:14:14 -0400
committerChris Mason <chris.mason@oracle.com>2010-10-29 09:26:29 -0400
commit0cb59c9953171e9adf6da8142a5c85ceb77bb60d (patch)
treef72af47fa18815491814290a1b4907082bd9316d /fs/btrfs/free-space-cache.c
parent0af3d00bad38d3bb9912a60928ad0669f17bdb76 (diff)
Btrfs: write out free space cache
This is a simple bit, just dump the free space cache out to our preallocated inode when we're writing out dirty block groups. There are a bunch of changes in inode.c in order to account for special cases. Mostly when we're doing the writeout we're holding trans_mutex, so we need to use the nolock transacation functions. Also we can't do asynchronous completions since the async thread could be blocked on already completed IO waiting for the transaction lock. This has been tested with xfstests and btrfs filesystem balance, as well as my ENOSPC tests. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r--fs/btrfs/free-space-cache.c302
1 files changed, 302 insertions, 0 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 05efcc7061a7..7f972e59cc04 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -28,6 +28,11 @@
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
30 30
31static void recalculate_thresholds(struct btrfs_block_group_cache
32 *block_group);
33static int link_free_space(struct btrfs_block_group_cache *block_group,
34 struct btrfs_free_space *info);
35
31struct inode *lookup_free_space_inode(struct btrfs_root *root, 36struct inode *lookup_free_space_inode(struct btrfs_root *root,
32 struct btrfs_block_group_cache 37 struct btrfs_block_group_cache
33 *block_group, struct btrfs_path *path) 38 *block_group, struct btrfs_path *path)
@@ -182,6 +187,303 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
182 return btrfs_update_inode(trans, root, inode); 187 return btrfs_update_inode(trans, root, inode);
183} 188}
184 189
190int btrfs_write_out_cache(struct btrfs_root *root,
191 struct btrfs_trans_handle *trans,
192 struct btrfs_block_group_cache *block_group,
193 struct btrfs_path *path)
194{
195 struct btrfs_free_space_header *header;
196 struct extent_buffer *leaf;
197 struct inode *inode;
198 struct rb_node *node;
199 struct list_head *pos, *n;
200 struct page *page;
201 struct extent_state *cached_state = NULL;
202 struct list_head bitmap_list;
203 struct btrfs_key key;
204 u64 bytes = 0;
205 u32 *crc, *checksums;
206 pgoff_t index = 0, last_index = 0;
207 unsigned long first_page_offset;
208 int num_checksums;
209 int entries = 0;
210 int bitmaps = 0;
211 int ret = 0;
212
213 root = root->fs_info->tree_root;
214
215 INIT_LIST_HEAD(&bitmap_list);
216
217 spin_lock(&block_group->lock);
218 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
219 spin_unlock(&block_group->lock);
220 return 0;
221 }
222 spin_unlock(&block_group->lock);
223
224 inode = lookup_free_space_inode(root, block_group, path);
225 if (IS_ERR(inode))
226 return 0;
227
228 if (!i_size_read(inode)) {
229 iput(inode);
230 return 0;
231 }
232
233 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
234 filemap_write_and_wait(inode->i_mapping);
235 btrfs_wait_ordered_range(inode, inode->i_size &
236 ~(root->sectorsize - 1), (u64)-1);
237
238 /* We need a checksum per page. */
239 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
240 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
241 if (!crc) {
242 iput(inode);
243 return 0;
244 }
245
246 /* Since the first page has all of our checksums and our generation we
247 * need to calculate the offset into the page that we can start writing
248 * our entries.
249 */
250 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
251
252 node = rb_first(&block_group->free_space_offset);
253 if (!node)
254 goto out_free;
255
256 /*
257 * Lock all pages first so we can lock the extent safely.
258 *
259 * NOTE: Because we hold the ref the entire time we're going to write to
260 * the page find_get_page should never fail, so we don't do a check
261 * after find_get_page at this point. Just putting this here so people
262 * know and don't freak out.
263 */
264 while (index <= last_index) {
265 page = grab_cache_page(inode->i_mapping, index);
266 if (!page) {
267 pgoff_t i = 0;
268
269 while (i < index) {
270 page = find_get_page(inode->i_mapping, i);
271 unlock_page(page);
272 page_cache_release(page);
273 page_cache_release(page);
274 i++;
275 }
276 goto out_free;
277 }
278 index++;
279 }
280
281 index = 0;
282 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
283 0, &cached_state, GFP_NOFS);
284
285 /* Write out the extent entries */
286 do {
287 struct btrfs_free_space_entry *entry;
288 void *addr;
289 unsigned long offset = 0;
290 unsigned long start_offset = 0;
291
292 if (index == 0) {
293 start_offset = first_page_offset;
294 offset = start_offset;
295 }
296
297 page = find_get_page(inode->i_mapping, index);
298
299 addr = kmap(page);
300 entry = addr + start_offset;
301
302 memset(addr, 0, PAGE_CACHE_SIZE);
303 while (1) {
304 struct btrfs_free_space *e;
305
306 e = rb_entry(node, struct btrfs_free_space, offset_index);
307 entries++;
308
309 entry->offset = cpu_to_le64(e->offset);
310 entry->bytes = cpu_to_le64(e->bytes);
311 if (e->bitmap) {
312 entry->type = BTRFS_FREE_SPACE_BITMAP;
313 list_add_tail(&e->list, &bitmap_list);
314 bitmaps++;
315 } else {
316 entry->type = BTRFS_FREE_SPACE_EXTENT;
317 }
318 node = rb_next(node);
319 if (!node)
320 break;
321 offset += sizeof(struct btrfs_free_space_entry);
322 if (offset + sizeof(struct btrfs_free_space_entry) >=
323 PAGE_CACHE_SIZE)
324 break;
325 entry++;
326 }
327 *crc = ~(u32)0;
328 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
329 PAGE_CACHE_SIZE - start_offset);
330 kunmap(page);
331
332 btrfs_csum_final(*crc, (char *)crc);
333 crc++;
334
335 bytes += PAGE_CACHE_SIZE;
336
337 ClearPageChecked(page);
338 set_page_extent_mapped(page);
339 SetPageUptodate(page);
340 set_page_dirty(page);
341
342 /*
343 * We need to release our reference we got for grab_cache_page,
344 * except for the first page which will hold our checksums, we
345 * do that below.
346 */
347 if (index != 0) {
348 unlock_page(page);
349 page_cache_release(page);
350 }
351
352 page_cache_release(page);
353
354 index++;
355 } while (node);
356
357 /* Write out the bitmaps */
358 list_for_each_safe(pos, n, &bitmap_list) {
359 void *addr;
360 struct btrfs_free_space *entry =
361 list_entry(pos, struct btrfs_free_space, list);
362
363 page = find_get_page(inode->i_mapping, index);
364
365 addr = kmap(page);
366 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
367 *crc = ~(u32)0;
368 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
369 kunmap(page);
370 btrfs_csum_final(*crc, (char *)crc);
371 crc++;
372 bytes += PAGE_CACHE_SIZE;
373
374 ClearPageChecked(page);
375 set_page_extent_mapped(page);
376 SetPageUptodate(page);
377 set_page_dirty(page);
378 unlock_page(page);
379 page_cache_release(page);
380 page_cache_release(page);
381 list_del_init(&entry->list);
382 index++;
383 }
384
385 /* Zero out the rest of the pages just to make sure */
386 while (index <= last_index) {
387 void *addr;
388
389 page = find_get_page(inode->i_mapping, index);
390
391 addr = kmap(page);
392 memset(addr, 0, PAGE_CACHE_SIZE);
393 kunmap(page);
394 ClearPageChecked(page);
395 set_page_extent_mapped(page);
396 SetPageUptodate(page);
397 set_page_dirty(page);
398 unlock_page(page);
399 page_cache_release(page);
400 page_cache_release(page);
401 bytes += PAGE_CACHE_SIZE;
402 index++;
403 }
404
405 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
406
407 /* Write the checksums and trans id to the first page */
408 {
409 void *addr;
410 u64 *gen;
411
412 page = find_get_page(inode->i_mapping, 0);
413
414 addr = kmap(page);
415 memcpy(addr, checksums, sizeof(u32) * num_checksums);
416 gen = addr + (sizeof(u32) * num_checksums);
417 *gen = trans->transid;
418 kunmap(page);
419 ClearPageChecked(page);
420 set_page_extent_mapped(page);
421 SetPageUptodate(page);
422 set_page_dirty(page);
423 unlock_page(page);
424 page_cache_release(page);
425 page_cache_release(page);
426 }
427 BTRFS_I(inode)->generation = trans->transid;
428
429 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
430 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
431
432 filemap_write_and_wait(inode->i_mapping);
433
434 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
435 key.offset = block_group->key.objectid;
436 key.type = 0;
437
438 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
439 if (ret < 0) {
440 ret = 0;
441 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
442 EXTENT_DIRTY | EXTENT_DELALLOC |
443 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
444 goto out_free;
445 }
446 leaf = path->nodes[0];
447 if (ret > 0) {
448 struct btrfs_key found_key;
449 BUG_ON(!path->slots[0]);
450 path->slots[0]--;
451 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
452 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
453 found_key.offset != block_group->key.objectid) {
454 ret = 0;
455 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
456 EXTENT_DIRTY | EXTENT_DELALLOC |
457 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
458 GFP_NOFS);
459 btrfs_release_path(root, path);
460 goto out_free;
461 }
462 }
463 header = btrfs_item_ptr(leaf, path->slots[0],
464 struct btrfs_free_space_header);
465 btrfs_set_free_space_entries(leaf, header, entries);
466 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
467 btrfs_set_free_space_generation(leaf, header, trans->transid);
468 btrfs_mark_buffer_dirty(leaf);
469 btrfs_release_path(root, path);
470
471 ret = 1;
472
473out_free:
474 if (ret == 0) {
475 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
476 spin_lock(&block_group->lock);
477 block_group->disk_cache_state = BTRFS_DC_ERROR;
478 spin_unlock(&block_group->lock);
479 BTRFS_I(inode)->generation = 0;
480 }
481 kfree(checksums);
482 btrfs_update_inode(trans, root, inode);
483 iput(inode);
484 return ret;
485}
486
185static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 487static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
186 u64 offset) 488 u64 offset)
187{ 489{