aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-08-25 16:54:15 -0400
committerChris Mason <chris.mason@oracle.com>2010-10-29 09:26:35 -0400
commit9d66e233c7042da27ec699453770f41e567a0442 (patch)
tree27fd70c6c07cb96a48123bdec07e9c2feed90f13 /fs
parent0cb59c9953171e9adf6da8142a5c85ceb77bb60d (diff)
Btrfs: load free space cache if it exists
This patch actually loads the free space cache if it exists. The only thing that really changes here is that we need to cache the block group if we're going to remove an extent from it. Previously we did not do this since the caching kthread would pick it up. With the on disk cache we don't have this luxury so we need to make sure we read the on disk cache in first, and then remove the extent, that way when the extent is unpinned the free space is added to the block group. This has been tested with all sorts of things. Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c50
-rw-r--r--fs/btrfs/free-space-cache.c296
-rw-r--r--fs/btrfs/free-space-cache.h2
3 files changed, 345 insertions, 3 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d5455a2bf60b..9a325e465ad9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -421,7 +421,9 @@ err:
421 return 0; 421 return 0;
422} 422}
423 423
424static int cache_block_group(struct btrfs_block_group_cache *cache) 424static int cache_block_group(struct btrfs_block_group_cache *cache,
425 struct btrfs_trans_handle *trans,
426 int load_cache_only)
425{ 427{
426 struct btrfs_fs_info *fs_info = cache->fs_info; 428 struct btrfs_fs_info *fs_info = cache->fs_info;
427 struct btrfs_caching_control *caching_ctl; 429 struct btrfs_caching_control *caching_ctl;
@@ -432,6 +434,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
432 if (cache->cached != BTRFS_CACHE_NO) 434 if (cache->cached != BTRFS_CACHE_NO)
433 return 0; 435 return 0;
434 436
437 /*
438 * We can't do the read from on-disk cache during a commit since we need
439 * to have the normal tree locking.
440 */
441 if (!trans->transaction->in_commit) {
442 spin_lock(&cache->lock);
443 if (cache->cached != BTRFS_CACHE_NO) {
444 spin_unlock(&cache->lock);
445 return 0;
446 }
447 cache->cached = BTRFS_CACHE_STARTED;
448 spin_unlock(&cache->lock);
449
450 ret = load_free_space_cache(fs_info, cache);
451
452 spin_lock(&cache->lock);
453 if (ret == 1) {
454 cache->cached = BTRFS_CACHE_FINISHED;
455 cache->last_byte_to_unpin = (u64)-1;
456 } else {
457 cache->cached = BTRFS_CACHE_NO;
458 }
459 spin_unlock(&cache->lock);
460 if (ret == 1)
461 return 0;
462 }
463
464 if (load_cache_only)
465 return 0;
466
435 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 467 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
436 BUG_ON(!caching_ctl); 468 BUG_ON(!caching_ctl);
437 469
@@ -3984,6 +4016,14 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3984 factor = 2; 4016 factor = 2;
3985 else 4017 else
3986 factor = 1; 4018 factor = 1;
4019 /*
4020 * If this block group has free space cache written out, we
4021 * need to make sure to load it if we are removing space. This
4022 * is because we need the unpinning stage to actually add the
4023 * space back to the block group, otherwise we will leak space.
4024 */
4025 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4026 cache_block_group(cache, trans, 1);
3987 4027
3988 byte_in_group = bytenr - cache->key.objectid; 4028 byte_in_group = bytenr - cache->key.objectid;
3989 WARN_ON(byte_in_group > cache->key.offset); 4029 WARN_ON(byte_in_group > cache->key.offset);
@@ -4828,6 +4868,10 @@ have_block_group:
4828 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4868 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4829 u64 free_percent; 4869 u64 free_percent;
4830 4870
4871 ret = cache_block_group(block_group, trans, 1);
4872 if (block_group->cached == BTRFS_CACHE_FINISHED)
4873 goto have_block_group;
4874
4831 free_percent = btrfs_block_group_used(&block_group->item); 4875 free_percent = btrfs_block_group_used(&block_group->item);
4832 free_percent *= 100; 4876 free_percent *= 100;
4833 free_percent = div64_u64(free_percent, 4877 free_percent = div64_u64(free_percent,
@@ -4848,7 +4892,7 @@ have_block_group:
4848 if (loop > LOOP_CACHING_NOWAIT || 4892 if (loop > LOOP_CACHING_NOWAIT ||
4849 (loop > LOOP_FIND_IDEAL && 4893 (loop > LOOP_FIND_IDEAL &&
4850 atomic_read(&space_info->caching_threads) < 2)) { 4894 atomic_read(&space_info->caching_threads) < 2)) {
4851 ret = cache_block_group(block_group); 4895 ret = cache_block_group(block_group, trans, 0);
4852 BUG_ON(ret); 4896 BUG_ON(ret);
4853 } 4897 }
4854 found_uncached_bg = true; 4898 found_uncached_bg = true;
@@ -5405,7 +5449,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5405 u64 num_bytes = ins->offset; 5449 u64 num_bytes = ins->offset;
5406 5450
5407 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5451 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5408 cache_block_group(block_group); 5452 cache_block_group(block_group, trans, 0);
5409 caching_ctl = get_caching_control(block_group); 5453 caching_ctl = get_caching_control(block_group);
5410 5454
5411 if (!caching_ctl) { 5455 if (!caching_ctl) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 7f972e59cc04..baa193423fb8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -187,6 +187,302 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
187 return btrfs_update_inode(trans, root, inode); 187 return btrfs_update_inode(trans, root, inode);
188} 188}
189 189
190static int readahead_cache(struct inode *inode)
191{
192 struct file_ra_state *ra;
193 unsigned long last_index;
194
195 ra = kzalloc(sizeof(*ra), GFP_NOFS);
196 if (!ra)
197 return -ENOMEM;
198
199 file_ra_state_init(ra, inode->i_mapping);
200 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
201
202 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
203
204 kfree(ra);
205
206 return 0;
207}
208
209int load_free_space_cache(struct btrfs_fs_info *fs_info,
210 struct btrfs_block_group_cache *block_group)
211{
212 struct btrfs_root *root = fs_info->tree_root;
213 struct inode *inode;
214 struct btrfs_free_space_header *header;
215 struct extent_buffer *leaf;
216 struct page *page;
217 struct btrfs_path *path;
218 u32 *checksums = NULL, *crc;
219 char *disk_crcs = NULL;
220 struct btrfs_key key;
221 struct list_head bitmaps;
222 u64 num_entries;
223 u64 num_bitmaps;
224 u64 generation;
225 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0;
227 unsigned long first_page_offset;
228 int num_checksums;
229 int ret = 0;
230
231 /*
232 * If we're unmounting then just return, since this does a search on the
233 * normal root and not the commit root and we could deadlock.
234 */
235 smp_mb();
236 if (fs_info->closing)
237 return 0;
238
239 /*
240 * If this block group has been marked to be cleared for one reason or
241 * another then we can't trust the on disk cache, so just return.
242 */
243 spin_lock(&block_group->lock);
244 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
245 printk(KERN_ERR "not reading block group %llu, dcs is %d\n", block_group->key.objectid,
246 block_group->disk_cache_state);
247 spin_unlock(&block_group->lock);
248 return 0;
249 }
250 spin_unlock(&block_group->lock);
251
252 INIT_LIST_HEAD(&bitmaps);
253
254 path = btrfs_alloc_path();
255 if (!path)
256 return 0;
257
258 inode = lookup_free_space_inode(root, block_group, path);
259 if (IS_ERR(inode)) {
260 btrfs_free_path(path);
261 return 0;
262 }
263
264 /* Nothing in the space cache, goodbye */
265 if (!i_size_read(inode)) {
266 btrfs_free_path(path);
267 goto out;
268 }
269
270 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
271 key.offset = block_group->key.objectid;
272 key.type = 0;
273
274 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
275 if (ret) {
276 btrfs_free_path(path);
277 goto out;
278 }
279
280 leaf = path->nodes[0];
281 header = btrfs_item_ptr(leaf, path->slots[0],
282 struct btrfs_free_space_header);
283 num_entries = btrfs_free_space_entries(leaf, header);
284 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
285 generation = btrfs_free_space_generation(leaf, header);
286 btrfs_free_path(path);
287
288 if (BTRFS_I(inode)->generation != generation) {
289 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
290 " not match free space cache generation (%llu) for "
291 "block group %llu\n",
292 (unsigned long long)BTRFS_I(inode)->generation,
293 (unsigned long long)generation,
294 (unsigned long long)block_group->key.objectid);
295 goto out;
296 }
297
298 if (!num_entries)
299 goto out;
300
301 /* Setup everything for doing checksumming */
302 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
303 checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
304 if (!checksums)
305 goto out;
306 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
307 disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
308 if (!disk_crcs)
309 goto out;
310
311 ret = readahead_cache(inode);
312 if (ret) {
313 ret = 0;
314 goto out;
315 }
316
317 while (1) {
318 struct btrfs_free_space_entry *entry;
319 struct btrfs_free_space *e;
320 void *addr;
321 unsigned long offset = 0;
322 unsigned long start_offset = 0;
323 int need_loop = 0;
324
325 if (!num_entries && !num_bitmaps)
326 break;
327
328 if (index == 0) {
329 start_offset = first_page_offset;
330 offset = start_offset;
331 }
332
333 page = grab_cache_page(inode->i_mapping, index);
334 if (!page) {
335 ret = 0;
336 goto free_cache;
337 }
338
339 if (!PageUptodate(page)) {
340 btrfs_readpage(NULL, page);
341 lock_page(page);
342 if (!PageUptodate(page)) {
343 unlock_page(page);
344 page_cache_release(page);
345 printk(KERN_ERR "btrfs: error reading free "
346 "space cache: %llu\n",
347 (unsigned long long)
348 block_group->key.objectid);
349 goto free_cache;
350 }
351 }
352 addr = kmap(page);
353
354 if (index == 0) {
355 u64 *gen;
356
357 memcpy(disk_crcs, addr, first_page_offset);
358 gen = addr + (sizeof(u32) * num_checksums);
359 if (*gen != BTRFS_I(inode)->generation) {
360 printk(KERN_ERR "btrfs: space cache generation"
361 " (%llu) does not match inode (%llu) "
362 "for block group %llu\n",
363 (unsigned long long)*gen,
364 (unsigned long long)
365 BTRFS_I(inode)->generation,
366 (unsigned long long)
367 block_group->key.objectid);
368 kunmap(page);
369 unlock_page(page);
370 page_cache_release(page);
371 goto free_cache;
372 }
373 crc = (u32 *)disk_crcs;
374 }
375 entry = addr + start_offset;
376
377 /* First lets check our crc before we do anything fun */
378 cur_crc = ~(u32)0;
379 cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
380 PAGE_CACHE_SIZE - start_offset);
381 btrfs_csum_final(cur_crc, (char *)&cur_crc);
382 if (cur_crc != *crc) {
383 printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
384 "block group %llu\n", index,
385 (unsigned long long)block_group->key.objectid);
386 kunmap(page);
387 unlock_page(page);
388 page_cache_release(page);
389 goto free_cache;
390 }
391 crc++;
392
393 while (1) {
394 if (!num_entries)
395 break;
396
397 need_loop = 1;
398 e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
399 if (!e) {
400 kunmap(page);
401 unlock_page(page);
402 page_cache_release(page);
403 goto free_cache;
404 }
405
406 e->offset = le64_to_cpu(entry->offset);
407 e->bytes = le64_to_cpu(entry->bytes);
408 if (!e->bytes) {
409 kunmap(page);
410 kfree(e);
411 unlock_page(page);
412 page_cache_release(page);
413 goto free_cache;
414 }
415
416 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
417 spin_lock(&block_group->tree_lock);
418 ret = link_free_space(block_group, e);
419 spin_unlock(&block_group->tree_lock);
420 BUG_ON(ret);
421 } else {
422 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
423 if (!e->bitmap) {
424 kunmap(page);
425 kfree(e);
426 unlock_page(page);
427 page_cache_release(page);
428 goto free_cache;
429 }
430 spin_lock(&block_group->tree_lock);
431 ret = link_free_space(block_group, e);
432 block_group->total_bitmaps++;
433 recalculate_thresholds(block_group);
434 spin_unlock(&block_group->tree_lock);
435 list_add_tail(&e->list, &bitmaps);
436 }
437
438 num_entries--;
439 offset += sizeof(struct btrfs_free_space_entry);
440 if (offset + sizeof(struct btrfs_free_space_entry) >=
441 PAGE_CACHE_SIZE)
442 break;
443 entry++;
444 }
445
446 /*
447 * We read an entry out of this page, we need to move on to the
448 * next page.
449 */
450 if (need_loop) {
451 kunmap(page);
452 goto next;
453 }
454
455 /*
456 * We add the bitmaps at the end of the entries in order that
457 * the bitmap entries are added to the cache.
458 */
459 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
460 list_del_init(&e->list);
461 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
462 kunmap(page);
463 num_bitmaps--;
464next:
465 unlock_page(page);
466 page_cache_release(page);
467 index++;
468 }
469
470 ret = 1;
471out:
472 kfree(checksums);
473 kfree(disk_crcs);
474 iput(inode);
475 return ret;
476
477free_cache:
478 /* This cache is bogus, make sure it gets cleared */
479 spin_lock(&block_group->lock);
480 block_group->disk_cache_state = BTRFS_DC_CLEAR;
481 spin_unlock(&block_group->lock);
482 btrfs_remove_free_space_cache(block_group);
483 goto out;
484}
485
190int btrfs_write_out_cache(struct btrfs_root *root, 486int btrfs_write_out_cache(struct btrfs_root *root,
191 struct btrfs_trans_handle *trans, 487 struct btrfs_trans_handle *trans,
192 struct btrfs_block_group_cache *block_group, 488 struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 189f740bd3c0..e49ca5c321b5 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -39,6 +39,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
39 struct btrfs_trans_handle *trans, 39 struct btrfs_trans_handle *trans,
40 struct btrfs_path *path, 40 struct btrfs_path *path,
41 struct inode *inode); 41 struct inode *inode);
42int load_free_space_cache(struct btrfs_fs_info *fs_info,
43 struct btrfs_block_group_cache *block_group);
42int btrfs_write_out_cache(struct btrfs_root *root, 44int btrfs_write_out_cache(struct btrfs_root *root,
43 struct btrfs_trans_handle *trans, 45 struct btrfs_trans_handle *trans,
44 struct btrfs_block_group_cache *block_group, 46 struct btrfs_block_group_cache *block_group,