diff options
author | Li Zefan <lizf@cn.fujitsu.com> | 2011-04-19 22:06:11 -0400 |
---|---|---|
committer | Li Zefan <lizf@cn.fujitsu.com> | 2011-04-25 04:46:04 -0400 |
commit | 581bb050941b4f220f84d3e5ed6dace3d42dd382 (patch) | |
tree | 5ebd56af5eb3612f508419b188dfc18e959e7c94 /fs/btrfs/free-space-cache.c | |
parent | 34d52cb6c50b5a43901709998f59fb1c5a43dc4a (diff) |
Btrfs: Cache free inode numbers in memory
Currently btrfs stores the highest objectid of the fs tree, and it always
returns (highest+1) inode number when we create a file, so inode numbers
won't be reclaimed when we delete files, so we'll run out of inode numbers
as we keep create/delete files in 32bits machines.
This fixes it, and it works similarly to how we cache free space in block
cgroups.
We start a kernel thread to read the file tree. By scanning inode items,
we know which chunks of inode numbers are free, and we cache them in
an rb-tree.
Because we are searching the commit root, we have to carefully handle the
cross-transaction case.
The rb-tree is a hybrid extent+bitmap tree, so if we have too many small
chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram
of extents, and a bitmap will be used if we exceed this threshold. The
extents threshold is adjusted in runtime.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 96 |
1 files changed, 76 insertions, 20 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d4fb4f077a79..2ce89bfc8815 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "disk-io.h" | 26 | #include "disk-io.h" |
27 | #include "extent_io.h" | 27 | #include "extent_io.h" |
28 | #include "inode-map.h" | ||
28 | 29 | ||
29 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 30 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
30 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 31 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
@@ -105,7 +106,7 @@ int create_free_space_inode(struct btrfs_root *root, | |||
105 | u64 objectid; | 106 | u64 objectid; |
106 | int ret; | 107 | int ret; |
107 | 108 | ||
108 | ret = btrfs_find_free_objectid(trans, root, 0, &objectid); | 109 | ret = btrfs_find_free_objectid(root, &objectid); |
109 | if (ret < 0) | 110 | if (ret < 0) |
110 | return ret; | 111 | return ret; |
111 | 112 | ||
@@ -1496,10 +1497,9 @@ bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, | |||
1496 | return merged; | 1497 | return merged; |
1497 | } | 1498 | } |
1498 | 1499 | ||
1499 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 1500 | int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, |
1500 | u64 offset, u64 bytes) | 1501 | u64 offset, u64 bytes) |
1501 | { | 1502 | { |
1502 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
1503 | struct btrfs_free_space *info; | 1503 | struct btrfs_free_space *info; |
1504 | int ret = 0; | 1504 | int ret = 0; |
1505 | 1505 | ||
@@ -1751,11 +1751,29 @@ out: | |||
1751 | return 0; | 1751 | return 0; |
1752 | } | 1752 | } |
1753 | 1753 | ||
1754 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | 1754 | void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) |
1755 | { | 1755 | { |
1756 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
1757 | struct btrfs_free_space *info; | 1756 | struct btrfs_free_space *info; |
1758 | struct rb_node *node; | 1757 | struct rb_node *node; |
1758 | |||
1759 | spin_lock(&ctl->tree_lock); | ||
1760 | while ((node = rb_last(&ctl->free_space_offset)) != NULL) { | ||
1761 | info = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1762 | unlink_free_space(ctl, info); | ||
1763 | kfree(info->bitmap); | ||
1764 | kmem_cache_free(btrfs_free_space_cachep, info); | ||
1765 | if (need_resched()) { | ||
1766 | spin_unlock(&ctl->tree_lock); | ||
1767 | cond_resched(); | ||
1768 | spin_lock(&ctl->tree_lock); | ||
1769 | } | ||
1770 | } | ||
1771 | spin_unlock(&ctl->tree_lock); | ||
1772 | } | ||
1773 | |||
1774 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | ||
1775 | { | ||
1776 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
1759 | struct btrfs_free_cluster *cluster; | 1777 | struct btrfs_free_cluster *cluster; |
1760 | struct list_head *head; | 1778 | struct list_head *head; |
1761 | 1779 | ||
@@ -1773,21 +1791,9 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
1773 | spin_lock(&ctl->tree_lock); | 1791 | spin_lock(&ctl->tree_lock); |
1774 | } | 1792 | } |
1775 | } | 1793 | } |
1776 | |||
1777 | while ((node = rb_last(&ctl->free_space_offset)) != NULL) { | ||
1778 | info = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1779 | unlink_free_space(ctl, info); | ||
1780 | if (info->bitmap) | ||
1781 | kfree(info->bitmap); | ||
1782 | kmem_cache_free(btrfs_free_space_cachep, info); | ||
1783 | if (need_resched()) { | ||
1784 | spin_unlock(&ctl->tree_lock); | ||
1785 | cond_resched(); | ||
1786 | spin_lock(&ctl->tree_lock); | ||
1787 | } | ||
1788 | } | ||
1789 | |||
1790 | spin_unlock(&ctl->tree_lock); | 1794 | spin_unlock(&ctl->tree_lock); |
1795 | |||
1796 | __btrfs_remove_free_space_cache(ctl); | ||
1791 | } | 1797 | } |
1792 | 1798 | ||
1793 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | 1799 | u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, |
@@ -2352,3 +2358,53 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
2352 | 2358 | ||
2353 | return ret; | 2359 | return ret; |
2354 | } | 2360 | } |
2361 | |||
2362 | /* | ||
2363 | * Find the left-most item in the cache tree, and then return the | ||
2364 | * smallest inode number in the item. | ||
2365 | * | ||
2366 | * Note: the returned inode number may not be the smallest one in | ||
2367 | * the tree, if the left-most item is a bitmap. | ||
2368 | */ | ||
2369 | u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) | ||
2370 | { | ||
2371 | struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; | ||
2372 | struct btrfs_free_space *entry = NULL; | ||
2373 | u64 ino = 0; | ||
2374 | |||
2375 | spin_lock(&ctl->tree_lock); | ||
2376 | |||
2377 | if (RB_EMPTY_ROOT(&ctl->free_space_offset)) | ||
2378 | goto out; | ||
2379 | |||
2380 | entry = rb_entry(rb_first(&ctl->free_space_offset), | ||
2381 | struct btrfs_free_space, offset_index); | ||
2382 | |||
2383 | if (!entry->bitmap) { | ||
2384 | ino = entry->offset; | ||
2385 | |||
2386 | unlink_free_space(ctl, entry); | ||
2387 | entry->offset++; | ||
2388 | entry->bytes--; | ||
2389 | if (!entry->bytes) | ||
2390 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
2391 | else | ||
2392 | link_free_space(ctl, entry); | ||
2393 | } else { | ||
2394 | u64 offset = 0; | ||
2395 | u64 count = 1; | ||
2396 | int ret; | ||
2397 | |||
2398 | ret = search_bitmap(ctl, entry, &offset, &count); | ||
2399 | BUG_ON(ret); | ||
2400 | |||
2401 | ino = offset; | ||
2402 | bitmap_clear_bits(ctl, entry, offset, 1); | ||
2403 | if (entry->bytes == 0) | ||
2404 | free_bitmap(ctl, entry); | ||
2405 | } | ||
2406 | out: | ||
2407 | spin_unlock(&ctl->tree_lock); | ||
2408 | |||
2409 | return ino; | ||
2410 | } | ||