aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c5
-rw-r--r--fs/btrfs/btrfs_inode.h14
-rw-r--r--fs/btrfs/compression.c5
-rw-r--r--fs/btrfs/ctree.c18
-rw-r--r--fs/btrfs/ctree.h58
-rw-r--r--fs/btrfs/delayed-inode.c1695
-rw-r--r--fs/btrfs/delayed-inode.h141
-rw-r--r--fs/btrfs/dir-item.c34
-rw-r--r--fs/btrfs/disk-io.c69
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/export.c25
-rw-r--r--fs/btrfs/extent-tree.c95
-rw-r--r--fs/btrfs/extent_io.c4
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c27
-rw-r--r--fs/btrfs/free-space-cache.c973
-rw-r--r--fs/btrfs/free-space-cache.h47
-rw-r--r--fs/btrfs/inode-map.c428
-rw-r--r--fs/btrfs/inode-map.h13
-rw-r--r--fs/btrfs/inode.c405
-rw-r--r--fs/btrfs/ioctl.c46
-rw-r--r--fs/btrfs/relocation.c27
-rw-r--r--fs/btrfs/super.c10
-rw-r--r--fs/btrfs/sysfs.c12
-rw-r--r--fs/btrfs/transaction.c56
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c61
-rw-r--r--fs/btrfs/xattr.c8
29 files changed, 3533 insertions, 753 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 31610ea73ae..a8411c22313 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,4 +7,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o 10 compression.o delayed-ref.o relocation.o delayed-inode.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 1a21c99a91b..f66fc995973 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,12 +178,13 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (IS_ERR(acl))
182 return PTR_ERR(acl);
183
181 if (acl) { 184 if (acl) {
182 ret = posix_acl_valid(acl); 185 ret = posix_acl_valid(acl);
183 if (ret) 186 if (ret)
184 goto out; 187 goto out;
185 } else if (IS_ERR(acl)) {
186 return PTR_ERR(acl);
187 } 188 }
188 } 189 }
189 190
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 57c3bb2884c..d0b0e43a6a8 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -22,6 +22,7 @@
22#include "extent_map.h" 22#include "extent_map.h"
23#include "extent_io.h" 23#include "extent_io.h"
24#include "ordered-data.h" 24#include "ordered-data.h"
25#include "delayed-inode.h"
25 26
26/* in memory btrfs inode */ 27/* in memory btrfs inode */
27struct btrfs_inode { 28struct btrfs_inode {
@@ -158,14 +159,27 @@ struct btrfs_inode {
158 */ 159 */
159 unsigned force_compress:4; 160 unsigned force_compress:4;
160 161
162 struct btrfs_delayed_node *delayed_node;
163
161 struct inode vfs_inode; 164 struct inode vfs_inode;
162}; 165};
163 166
167extern unsigned char btrfs_filetype_table[];
168
164static inline struct btrfs_inode *BTRFS_I(struct inode *inode) 169static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
165{ 170{
166 return container_of(inode, struct btrfs_inode, vfs_inode); 171 return container_of(inode, struct btrfs_inode, vfs_inode);
167} 172}
168 173
174static inline u64 btrfs_ino(struct inode *inode)
175{
176 u64 ino = BTRFS_I(inode)->location.objectid;
177
178 if (ino <= BTRFS_FIRST_FREE_OBJECTID)
179 ino = inode->i_ino;
180 return ino;
181}
182
169static inline void btrfs_i_size_write(struct inode *inode, u64 size) 183static inline void btrfs_i_size_write(struct inode *inode, u64 size)
170{ 184{
171 i_size_write(inode, size); 185 i_size_write(inode, size);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d4cd0f0cd69..bfe42b03eaf 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -125,9 +125,10 @@ static int check_compressed_csum(struct inode *inode,
125 kunmap_atomic(kaddr, KM_USER0); 125 kunmap_atomic(kaddr, KM_USER0);
126 126
127 if (csum != *cb_sum) { 127 if (csum != *cb_sum) {
128 printk(KERN_INFO "btrfs csum failed ino %lu " 128 printk(KERN_INFO "btrfs csum failed ino %llu "
129 "extent %llu csum %u " 129 "extent %llu csum %u "
130 "wanted %u mirror %d\n", inode->i_ino, 130 "wanted %u mirror %d\n",
131 (unsigned long long)btrfs_ino(inode),
131 (unsigned long long)disk_start, 132 (unsigned long long)disk_start,
132 csum, *cb_sum, cb->mirror_num); 133 csum, *cb_sum, cb->mirror_num);
133 ret = -EIO; 134 ret = -EIO;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index fad8f23d70f..b6cbeed226b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -38,11 +38,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
38 struct extent_buffer *src_buf); 38 struct extent_buffer *src_buf);
39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40 struct btrfs_path *path, int level, int slot); 40 struct btrfs_path *path, int level, int slot);
41static int setup_items_for_insert(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, struct btrfs_path *path,
43 struct btrfs_key *cpu_key, u32 *data_size,
44 u32 total_data, u32 total_size, int nr);
45
46 41
47struct btrfs_path *btrfs_alloc_path(void) 42struct btrfs_path *btrfs_alloc_path(void)
48{ 43{
@@ -74,8 +69,8 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
74 * retake all the spinlocks in the path. You can safely use NULL 69 * retake all the spinlocks in the path. You can safely use NULL
75 * for held 70 * for held
76 */ 71 */
77static noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 72noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
78 struct extent_buffer *held) 73 struct extent_buffer *held)
79{ 74{
80 int i; 75 int i;
81 76
@@ -3559,11 +3554,10 @@ out:
3559 * to save stack depth by doing the bulk of the work in a function 3554 * to save stack depth by doing the bulk of the work in a function
3560 * that doesn't call btrfs_search_slot 3555 * that doesn't call btrfs_search_slot
3561 */ 3556 */
3562static noinline_for_stack int 3557int setup_items_for_insert(struct btrfs_trans_handle *trans,
3563setup_items_for_insert(struct btrfs_trans_handle *trans, 3558 struct btrfs_root *root, struct btrfs_path *path,
3564 struct btrfs_root *root, struct btrfs_path *path, 3559 struct btrfs_key *cpu_key, u32 *data_size,
3565 struct btrfs_key *cpu_key, u32 *data_size, 3560 u32 total_data, u32 total_size, int nr)
3566 u32 total_data, u32 total_size, int nr)
3567{ 3561{
3568 struct btrfs_item *item; 3562 struct btrfs_item *item;
3569 int i; 3563 int i;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 343304dec6d..e7d40791ec9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -105,6 +105,12 @@ struct btrfs_ordered_sum;
105/* For storing free space cache */ 105/* For storing free space cache */
106#define BTRFS_FREE_SPACE_OBJECTID -11ULL 106#define BTRFS_FREE_SPACE_OBJECTID -11ULL
107 107
108/*
109 * The inode number assigned to the special inode for sotring
110 * free ino cache
111 */
112#define BTRFS_FREE_INO_OBJECTID -12ULL
113
108/* dummy objectid represents multiple objectids */ 114/* dummy objectid represents multiple objectids */
109#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 115#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
110 116
@@ -830,9 +836,6 @@ struct btrfs_block_group_cache {
830 u64 bytes_super; 836 u64 bytes_super;
831 u64 flags; 837 u64 flags;
832 u64 sectorsize; 838 u64 sectorsize;
833 int extents_thresh;
834 int free_extents;
835 int total_bitmaps;
836 unsigned int ro:1; 839 unsigned int ro:1;
837 unsigned int dirty:1; 840 unsigned int dirty:1;
838 unsigned int iref:1; 841 unsigned int iref:1;
@@ -847,9 +850,7 @@ struct btrfs_block_group_cache {
847 struct btrfs_space_info *space_info; 850 struct btrfs_space_info *space_info;
848 851
849 /* free space cache stuff */ 852 /* free space cache stuff */
850 spinlock_t tree_lock; 853 struct btrfs_free_space_ctl *free_space_ctl;
851 struct rb_root free_space_offset;
852 u64 free_space;
853 854
854 /* block group cache stuff */ 855 /* block group cache stuff */
855 struct rb_node cache_node; 856 struct rb_node cache_node;
@@ -869,6 +870,7 @@ struct btrfs_block_group_cache {
869struct reloc_control; 870struct reloc_control;
870struct btrfs_device; 871struct btrfs_device;
871struct btrfs_fs_devices; 872struct btrfs_fs_devices;
873struct btrfs_delayed_root;
872struct btrfs_fs_info { 874struct btrfs_fs_info {
873 u8 fsid[BTRFS_FSID_SIZE]; 875 u8 fsid[BTRFS_FSID_SIZE];
874 u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 876 u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -895,7 +897,10 @@ struct btrfs_fs_info {
895 /* logical->physical extent mapping */ 897 /* logical->physical extent mapping */
896 struct btrfs_mapping_tree mapping_tree; 898 struct btrfs_mapping_tree mapping_tree;
897 899
898 /* block reservation for extent, checksum and root tree */ 900 /*
901 * block reservation for extent, checksum, root tree and
902 * delayed dir index item
903 */
899 struct btrfs_block_rsv global_block_rsv; 904 struct btrfs_block_rsv global_block_rsv;
900 /* block reservation for delay allocation */ 905 /* block reservation for delay allocation */
901 struct btrfs_block_rsv delalloc_block_rsv; 906 struct btrfs_block_rsv delalloc_block_rsv;
@@ -1022,6 +1027,7 @@ struct btrfs_fs_info {
1022 * for the sys_munmap function call path 1027 * for the sys_munmap function call path
1023 */ 1028 */
1024 struct btrfs_workers fixup_workers; 1029 struct btrfs_workers fixup_workers;
1030 struct btrfs_workers delayed_workers;
1025 struct task_struct *transaction_kthread; 1031 struct task_struct *transaction_kthread;
1026 struct task_struct *cleaner_kthread; 1032 struct task_struct *cleaner_kthread;
1027 int thread_pool_size; 1033 int thread_pool_size;
@@ -1079,6 +1085,8 @@ struct btrfs_fs_info {
1079 1085
1080 /* filesystem state */ 1086 /* filesystem state */
1081 u64 fs_state; 1087 u64 fs_state;
1088
1089 struct btrfs_delayed_root *delayed_root;
1082}; 1090};
1083 1091
1084/* 1092/*
@@ -1107,6 +1115,16 @@ struct btrfs_root {
1107 spinlock_t accounting_lock; 1115 spinlock_t accounting_lock;
1108 struct btrfs_block_rsv *block_rsv; 1116 struct btrfs_block_rsv *block_rsv;
1109 1117
1118 /* free ino cache stuff */
1119 struct mutex fs_commit_mutex;
1120 struct btrfs_free_space_ctl *free_ino_ctl;
1121 enum btrfs_caching_type cached;
1122 spinlock_t cache_lock;
1123 wait_queue_head_t cache_wait;
1124 struct btrfs_free_space_ctl *free_ino_pinned;
1125 u64 cache_progress;
1126 struct inode *cache_inode;
1127
1110 struct mutex log_mutex; 1128 struct mutex log_mutex;
1111 wait_queue_head_t log_writer_wait; 1129 wait_queue_head_t log_writer_wait;
1112 wait_queue_head_t log_commit_wait[2]; 1130 wait_queue_head_t log_commit_wait[2];
@@ -1162,6 +1180,11 @@ struct btrfs_root {
1162 struct rb_root inode_tree; 1180 struct rb_root inode_tree;
1163 1181
1164 /* 1182 /*
1183 * radix tree that keeps track of delayed nodes of every inode,
1184 * protected by inode_lock
1185 */
1186 struct radix_tree_root delayed_nodes_tree;
1187 /*
1165 * right now this just gets used so that a root has its own devid 1188 * right now this just gets used so that a root has its own devid
1166 * for stat. It may be used for more later 1189 * for stat. It may be used for more later
1167 */ 1190 */
@@ -2034,6 +2057,13 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2034} 2057}
2035 2058
2036/* extent-tree.c */ 2059/* extent-tree.c */
2060static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
2061 int num_items)
2062{
2063 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
2064 3 * num_items;
2065}
2066
2037void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2067void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
2038int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2068int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2039 struct btrfs_root *root, unsigned long count); 2069 struct btrfs_root *root, unsigned long count);
@@ -2226,6 +2256,8 @@ void btrfs_release_path(struct btrfs_path *p);
2226struct btrfs_path *btrfs_alloc_path(void); 2256struct btrfs_path *btrfs_alloc_path(void);
2227void btrfs_free_path(struct btrfs_path *p); 2257void btrfs_free_path(struct btrfs_path *p);
2228void btrfs_set_path_blocking(struct btrfs_path *p); 2258void btrfs_set_path_blocking(struct btrfs_path *p);
2259void btrfs_clear_path_blocking(struct btrfs_path *p,
2260 struct extent_buffer *held);
2229void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 2261void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
2230 2262
2231int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2263int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2237,6 +2269,10 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
2237 return btrfs_del_items(trans, root, path, path->slots[0], 1); 2269 return btrfs_del_items(trans, root, path, path->slots[0], 1);
2238} 2270}
2239 2271
2272int setup_items_for_insert(struct btrfs_trans_handle *trans,
2273 struct btrfs_root *root, struct btrfs_path *path,
2274 struct btrfs_key *cpu_key, u32 *data_size,
2275 u32 total_data, u32 total_size, int nr);
2240int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 2276int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2241 *root, struct btrfs_key *key, void *data, u32 data_size); 2277 *root, struct btrfs_key *key, void *data, u32 data_size);
2242int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 2278int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
@@ -2293,7 +2329,7 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2293/* dir-item.c */ 2329/* dir-item.c */
2294int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2330int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2295 struct btrfs_root *root, const char *name, 2331 struct btrfs_root *root, const char *name,
2296 int name_len, u64 dir, 2332 int name_len, struct inode *dir,
2297 struct btrfs_key *location, u8 type, u64 index); 2333 struct btrfs_key *location, u8 type, u64 index);
2298struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, 2334struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
2299 struct btrfs_root *root, 2335 struct btrfs_root *root,
@@ -2338,12 +2374,6 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
2338 struct btrfs_root *root, u64 offset); 2374 struct btrfs_root *root, u64 offset);
2339int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); 2375int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
2340 2376
2341/* inode-map.c */
2342int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
2343 struct btrfs_root *fs_root,
2344 u64 dirid, u64 *objectid);
2345int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid);
2346
2347/* inode-item.c */ 2377/* inode-item.c */
2348int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, 2378int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
2349 struct btrfs_root *root, 2379 struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
new file mode 100644
index 00000000000..01e29503a54
--- /dev/null
+++ b/fs/btrfs/delayed-inode.c
@@ -0,0 +1,1695 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#include <linux/slab.h>
21#include "delayed-inode.h"
22#include "disk-io.h"
23#include "transaction.h"
24
25#define BTRFS_DELAYED_WRITEBACK 400
26#define BTRFS_DELAYED_BACKGROUND 100
27
28static struct kmem_cache *delayed_node_cache;
29
30int __init btrfs_delayed_inode_init(void)
31{
32 delayed_node_cache = kmem_cache_create("delayed_node",
33 sizeof(struct btrfs_delayed_node),
34 0,
35 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
36 NULL);
37 if (!delayed_node_cache)
38 return -ENOMEM;
39 return 0;
40}
41
42void btrfs_delayed_inode_exit(void)
43{
44 if (delayed_node_cache)
45 kmem_cache_destroy(delayed_node_cache);
46}
47
48static inline void btrfs_init_delayed_node(
49 struct btrfs_delayed_node *delayed_node,
50 struct btrfs_root *root, u64 inode_id)
51{
52 delayed_node->root = root;
53 delayed_node->inode_id = inode_id;
54 atomic_set(&delayed_node->refs, 0);
55 delayed_node->count = 0;
56 delayed_node->in_list = 0;
57 delayed_node->inode_dirty = 0;
58 delayed_node->ins_root = RB_ROOT;
59 delayed_node->del_root = RB_ROOT;
60 mutex_init(&delayed_node->mutex);
61 delayed_node->index_cnt = 0;
62 INIT_LIST_HEAD(&delayed_node->n_list);
63 INIT_LIST_HEAD(&delayed_node->p_list);
64 delayed_node->bytes_reserved = 0;
65}
66
67static inline int btrfs_is_continuous_delayed_item(
68 struct btrfs_delayed_item *item1,
69 struct btrfs_delayed_item *item2)
70{
71 if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
72 item1->key.objectid == item2->key.objectid &&
73 item1->key.type == item2->key.type &&
74 item1->key.offset + 1 == item2->key.offset)
75 return 1;
76 return 0;
77}
78
79static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
80 struct btrfs_root *root)
81{
82 return root->fs_info->delayed_root;
83}
84
85static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
86 struct inode *inode)
87{
88 struct btrfs_delayed_node *node;
89 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
90 struct btrfs_root *root = btrfs_inode->root;
91 u64 ino = btrfs_ino(inode);
92 int ret;
93
94again:
95 node = ACCESS_ONCE(btrfs_inode->delayed_node);
96 if (node) {
97 atomic_inc(&node->refs); /* can be accessed */
98 return node;
99 }
100
101 spin_lock(&root->inode_lock);
102 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
103 if (node) {
104 if (btrfs_inode->delayed_node) {
105 spin_unlock(&root->inode_lock);
106 goto again;
107 }
108 btrfs_inode->delayed_node = node;
109 atomic_inc(&node->refs); /* can be accessed */
110 atomic_inc(&node->refs); /* cached in the inode */
111 spin_unlock(&root->inode_lock);
112 return node;
113 }
114 spin_unlock(&root->inode_lock);
115
116 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
117 if (!node)
118 return ERR_PTR(-ENOMEM);
119 btrfs_init_delayed_node(node, root, ino);
120
121 atomic_inc(&node->refs); /* cached in the btrfs inode */
122 atomic_inc(&node->refs); /* can be accessed */
123
124 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
125 if (ret) {
126 kmem_cache_free(delayed_node_cache, node);
127 return ERR_PTR(ret);
128 }
129
130 spin_lock(&root->inode_lock);
131 ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
132 if (ret == -EEXIST) {
133 kmem_cache_free(delayed_node_cache, node);
134 spin_unlock(&root->inode_lock);
135 radix_tree_preload_end();
136 goto again;
137 }
138 btrfs_inode->delayed_node = node;
139 spin_unlock(&root->inode_lock);
140 radix_tree_preload_end();
141
142 return node;
143}
144
145/*
146 * Call it when holding delayed_node->mutex
147 *
148 * If mod = 1, add this node into the prepared list.
149 */
150static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
151 struct btrfs_delayed_node *node,
152 int mod)
153{
154 spin_lock(&root->lock);
155 if (node->in_list) {
156 if (!list_empty(&node->p_list))
157 list_move_tail(&node->p_list, &root->prepare_list);
158 else if (mod)
159 list_add_tail(&node->p_list, &root->prepare_list);
160 } else {
161 list_add_tail(&node->n_list, &root->node_list);
162 list_add_tail(&node->p_list, &root->prepare_list);
163 atomic_inc(&node->refs); /* inserted into list */
164 root->nodes++;
165 node->in_list = 1;
166 }
167 spin_unlock(&root->lock);
168}
169
170/* Call it when holding delayed_node->mutex */
171static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
172 struct btrfs_delayed_node *node)
173{
174 spin_lock(&root->lock);
175 if (node->in_list) {
176 root->nodes--;
177 atomic_dec(&node->refs); /* not in the list */
178 list_del_init(&node->n_list);
179 if (!list_empty(&node->p_list))
180 list_del_init(&node->p_list);
181 node->in_list = 0;
182 }
183 spin_unlock(&root->lock);
184}
185
186struct btrfs_delayed_node *btrfs_first_delayed_node(
187 struct btrfs_delayed_root *delayed_root)
188{
189 struct list_head *p;
190 struct btrfs_delayed_node *node = NULL;
191
192 spin_lock(&delayed_root->lock);
193 if (list_empty(&delayed_root->node_list))
194 goto out;
195
196 p = delayed_root->node_list.next;
197 node = list_entry(p, struct btrfs_delayed_node, n_list);
198 atomic_inc(&node->refs);
199out:
200 spin_unlock(&delayed_root->lock);
201
202 return node;
203}
204
205struct btrfs_delayed_node *btrfs_next_delayed_node(
206 struct btrfs_delayed_node *node)
207{
208 struct btrfs_delayed_root *delayed_root;
209 struct list_head *p;
210 struct btrfs_delayed_node *next = NULL;
211
212 delayed_root = node->root->fs_info->delayed_root;
213 spin_lock(&delayed_root->lock);
214 if (!node->in_list) { /* not in the list */
215 if (list_empty(&delayed_root->node_list))
216 goto out;
217 p = delayed_root->node_list.next;
218 } else if (list_is_last(&node->n_list, &delayed_root->node_list))
219 goto out;
220 else
221 p = node->n_list.next;
222
223 next = list_entry(p, struct btrfs_delayed_node, n_list);
224 atomic_inc(&next->refs);
225out:
226 spin_unlock(&delayed_root->lock);
227
228 return next;
229}
230
231static void __btrfs_release_delayed_node(
232 struct btrfs_delayed_node *delayed_node,
233 int mod)
234{
235 struct btrfs_delayed_root *delayed_root;
236
237 if (!delayed_node)
238 return;
239
240 delayed_root = delayed_node->root->fs_info->delayed_root;
241
242 mutex_lock(&delayed_node->mutex);
243 if (delayed_node->count)
244 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
245 else
246 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
247 mutex_unlock(&delayed_node->mutex);
248
249 if (atomic_dec_and_test(&delayed_node->refs)) {
250 struct btrfs_root *root = delayed_node->root;
251 spin_lock(&root->inode_lock);
252 if (atomic_read(&delayed_node->refs) == 0) {
253 radix_tree_delete(&root->delayed_nodes_tree,
254 delayed_node->inode_id);
255 kmem_cache_free(delayed_node_cache, delayed_node);
256 }
257 spin_unlock(&root->inode_lock);
258 }
259}
260
261static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
262{
263 __btrfs_release_delayed_node(node, 0);
264}
265
266struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
267 struct btrfs_delayed_root *delayed_root)
268{
269 struct list_head *p;
270 struct btrfs_delayed_node *node = NULL;
271
272 spin_lock(&delayed_root->lock);
273 if (list_empty(&delayed_root->prepare_list))
274 goto out;
275
276 p = delayed_root->prepare_list.next;
277 list_del_init(p);
278 node = list_entry(p, struct btrfs_delayed_node, p_list);
279 atomic_inc(&node->refs);
280out:
281 spin_unlock(&delayed_root->lock);
282
283 return node;
284}
285
286static inline void btrfs_release_prepared_delayed_node(
287 struct btrfs_delayed_node *node)
288{
289 __btrfs_release_delayed_node(node, 1);
290}
291
292struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
293{
294 struct btrfs_delayed_item *item;
295 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
296 if (item) {
297 item->data_len = data_len;
298 item->ins_or_del = 0;
299 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1);
303 }
304 return item;
305}
306
307/*
308 * __btrfs_lookup_delayed_item - look up the delayed item by key
309 * @delayed_node: pointer to the delayed node
310 * @key: the key to look up
311 * @prev: used to store the prev item if the right item isn't found
312 * @next: used to store the next item if the right item isn't found
313 *
314 * Note: if we don't find the right item, we will return the prev item and
315 * the next item.
316 */
317static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
318 struct rb_root *root,
319 struct btrfs_key *key,
320 struct btrfs_delayed_item **prev,
321 struct btrfs_delayed_item **next)
322{
323 struct rb_node *node, *prev_node = NULL;
324 struct btrfs_delayed_item *delayed_item = NULL;
325 int ret = 0;
326
327 node = root->rb_node;
328
329 while (node) {
330 delayed_item = rb_entry(node, struct btrfs_delayed_item,
331 rb_node);
332 prev_node = node;
333 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
334 if (ret < 0)
335 node = node->rb_right;
336 else if (ret > 0)
337 node = node->rb_left;
338 else
339 return delayed_item;
340 }
341
342 if (prev) {
343 if (!prev_node)
344 *prev = NULL;
345 else if (ret < 0)
346 *prev = delayed_item;
347 else if ((node = rb_prev(prev_node)) != NULL) {
348 *prev = rb_entry(node, struct btrfs_delayed_item,
349 rb_node);
350 } else
351 *prev = NULL;
352 }
353
354 if (next) {
355 if (!prev_node)
356 *next = NULL;
357 else if (ret > 0)
358 *next = delayed_item;
359 else if ((node = rb_next(prev_node)) != NULL) {
360 *next = rb_entry(node, struct btrfs_delayed_item,
361 rb_node);
362 } else
363 *next = NULL;
364 }
365 return NULL;
366}
367
368struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
369 struct btrfs_delayed_node *delayed_node,
370 struct btrfs_key *key)
371{
372 struct btrfs_delayed_item *item;
373
374 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
375 NULL, NULL);
376 return item;
377}
378
379struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
380 struct btrfs_delayed_node *delayed_node,
381 struct btrfs_key *key)
382{
383 struct btrfs_delayed_item *item;
384
385 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
386 NULL, NULL);
387 return item;
388}
389
390struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
391 struct btrfs_delayed_node *delayed_node,
392 struct btrfs_key *key)
393{
394 struct btrfs_delayed_item *item, *next;
395
396 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
397 NULL, &next);
398 if (!item)
399 item = next;
400
401 return item;
402}
403
404struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
405 struct btrfs_delayed_node *delayed_node,
406 struct btrfs_key *key)
407{
408 struct btrfs_delayed_item *item, *next;
409
410 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
411 NULL, &next);
412 if (!item)
413 item = next;
414
415 return item;
416}
417
418static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
419 struct btrfs_delayed_item *ins,
420 int action)
421{
422 struct rb_node **p, *node;
423 struct rb_node *parent_node = NULL;
424 struct rb_root *root;
425 struct btrfs_delayed_item *item;
426 int cmp;
427
428 if (action == BTRFS_DELAYED_INSERTION_ITEM)
429 root = &delayed_node->ins_root;
430 else if (action == BTRFS_DELAYED_DELETION_ITEM)
431 root = &delayed_node->del_root;
432 else
433 BUG();
434 p = &root->rb_node;
435 node = &ins->rb_node;
436
437 while (*p) {
438 parent_node = *p;
439 item = rb_entry(parent_node, struct btrfs_delayed_item,
440 rb_node);
441
442 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
443 if (cmp < 0)
444 p = &(*p)->rb_right;
445 else if (cmp > 0)
446 p = &(*p)->rb_left;
447 else
448 return -EEXIST;
449 }
450
451 rb_link_node(node, parent_node, p);
452 rb_insert_color(node, root);
453 ins->delayed_node = delayed_node;
454 ins->ins_or_del = action;
455
456 if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
457 action == BTRFS_DELAYED_INSERTION_ITEM &&
458 ins->key.offset >= delayed_node->index_cnt)
459 delayed_node->index_cnt = ins->key.offset + 1;
460
461 delayed_node->count++;
462 atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
463 return 0;
464}
465
466static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
467 struct btrfs_delayed_item *item)
468{
469 return __btrfs_add_delayed_item(node, item,
470 BTRFS_DELAYED_INSERTION_ITEM);
471}
472
473static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
474 struct btrfs_delayed_item *item)
475{
476 return __btrfs_add_delayed_item(node, item,
477 BTRFS_DELAYED_DELETION_ITEM);
478}
479
480static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
481{
482 struct rb_root *root;
483 struct btrfs_delayed_root *delayed_root;
484
485 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
486
487 BUG_ON(!delayed_root);
488 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
489 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
490
491 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
492 root = &delayed_item->delayed_node->ins_root;
493 else
494 root = &delayed_item->delayed_node->del_root;
495
496 rb_erase(&delayed_item->rb_node, root);
497 delayed_item->delayed_node->count--;
498 atomic_dec(&delayed_root->items);
499 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
500 waitqueue_active(&delayed_root->wait))
501 wake_up(&delayed_root->wait);
502}
503
504static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
505{
506 if (item) {
507 __btrfs_remove_delayed_item(item);
508 if (atomic_dec_and_test(&item->refs))
509 kfree(item);
510 }
511}
512
513struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
514 struct btrfs_delayed_node *delayed_node)
515{
516 struct rb_node *p;
517 struct btrfs_delayed_item *item = NULL;
518
519 p = rb_first(&delayed_node->ins_root);
520 if (p)
521 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
522
523 return item;
524}
525
526struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
527 struct btrfs_delayed_node *delayed_node)
528{
529 struct rb_node *p;
530 struct btrfs_delayed_item *item = NULL;
531
532 p = rb_first(&delayed_node->del_root);
533 if (p)
534 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
535
536 return item;
537}
538
539struct btrfs_delayed_item *__btrfs_next_delayed_item(
540 struct btrfs_delayed_item *item)
541{
542 struct rb_node *p;
543 struct btrfs_delayed_item *next = NULL;
544
545 p = rb_next(&item->rb_node);
546 if (p)
547 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
548
549 return next;
550}
551
552static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
553 struct inode *inode)
554{
555 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
556 struct btrfs_delayed_node *delayed_node;
557
558 delayed_node = btrfs_inode->delayed_node;
559 if (delayed_node)
560 atomic_inc(&delayed_node->refs);
561
562 return delayed_node;
563}
564
565static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
566 u64 root_id)
567{
568 struct btrfs_key root_key;
569
570 if (root->objectid == root_id)
571 return root;
572
573 root_key.objectid = root_id;
574 root_key.type = BTRFS_ROOT_ITEM_KEY;
575 root_key.offset = (u64)-1;
576 return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
577}
578
579static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
580 struct btrfs_root *root,
581 struct btrfs_delayed_item *item)
582{
583 struct btrfs_block_rsv *src_rsv;
584 struct btrfs_block_rsv *dst_rsv;
585 u64 num_bytes;
586 int ret;
587
588 if (!trans->bytes_reserved)
589 return 0;
590
591 src_rsv = trans->block_rsv;
592 dst_rsv = &root->fs_info->global_block_rsv;
593
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) {
597 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600
601 return ret;
602}
603
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item)
606{
607 if (!item->bytes_reserved)
608 return;
609
610 btrfs_block_rsv_release(root, item->block_rsv,
611 item->bytes_reserved);
612}
613
614static int btrfs_delayed_inode_reserve_metadata(
615 struct btrfs_trans_handle *trans,
616 struct btrfs_root *root,
617 struct btrfs_delayed_node *node)
618{
619 struct btrfs_block_rsv *src_rsv;
620 struct btrfs_block_rsv *dst_rsv;
621 u64 num_bytes;
622 int ret;
623
624 if (!trans->bytes_reserved)
625 return 0;
626
627 src_rsv = trans->block_rsv;
628 dst_rsv = &root->fs_info->global_block_rsv;
629
630 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
631 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
632 if (!ret)
633 node->bytes_reserved = num_bytes;
634
635 return ret;
636}
637
638static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
639 struct btrfs_delayed_node *node)
640{
641 struct btrfs_block_rsv *rsv;
642
643 if (!node->bytes_reserved)
644 return;
645
646 rsv = &root->fs_info->global_block_rsv;
647 btrfs_block_rsv_release(root, rsv,
648 node->bytes_reserved);
649 node->bytes_reserved = 0;
650}
651
652/*
653 * This helper will insert some continuous items into the same leaf according
654 * to the free space of the leaf.
655 */
656static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
657 struct btrfs_root *root,
658 struct btrfs_path *path,
659 struct btrfs_delayed_item *item)
660{
661 struct btrfs_delayed_item *curr, *next;
662 int free_space;
663 int total_data_size = 0, total_size = 0;
664 struct extent_buffer *leaf;
665 char *data_ptr;
666 struct btrfs_key *keys;
667 u32 *data_size;
668 struct list_head head;
669 int slot;
670 int nitems;
671 int i;
672 int ret = 0;
673
674 BUG_ON(!path->nodes[0]);
675
676 leaf = path->nodes[0];
677 free_space = btrfs_leaf_free_space(root, leaf);
678 INIT_LIST_HEAD(&head);
679
680 next = item;
681
682 /*
683 * count the number of the continuous items that we can insert in batch
684 */
685 while (total_size + next->data_len + sizeof(struct btrfs_item) <=
686 free_space) {
687 total_data_size += next->data_len;
688 total_size += next->data_len + sizeof(struct btrfs_item);
689 list_add_tail(&next->tree_list, &head);
690 nitems++;
691
692 curr = next;
693 next = __btrfs_next_delayed_item(curr);
694 if (!next)
695 break;
696
697 if (!btrfs_is_continuous_delayed_item(curr, next))
698 break;
699 }
700
701 if (!nitems) {
702 ret = 0;
703 goto out;
704 }
705
706 /*
707 * we need allocate some memory space, but it might cause the task
708 * to sleep, so we set all locked nodes in the path to blocking locks
709 * first.
710 */
711 btrfs_set_path_blocking(path);
712
713 keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
714 if (!keys) {
715 ret = -ENOMEM;
716 goto out;
717 }
718
719 data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
720 if (!data_size) {
721 ret = -ENOMEM;
722 goto error;
723 }
724
725 /* get keys of all the delayed items */
726 i = 0;
727 list_for_each_entry(next, &head, tree_list) {
728 keys[i] = next->key;
729 data_size[i] = next->data_len;
730 i++;
731 }
732
733 /* reset all the locked nodes in the patch to spinning locks. */
734 btrfs_clear_path_blocking(path, NULL);
735
736 /* insert the keys of the items */
737 ret = setup_items_for_insert(trans, root, path, keys, data_size,
738 total_data_size, total_size, nitems);
739 if (ret)
740 goto error;
741
742 /* insert the dir index items */
743 slot = path->slots[0];
744 list_for_each_entry_safe(curr, next, &head, tree_list) {
745 data_ptr = btrfs_item_ptr(leaf, slot, char);
746 write_extent_buffer(leaf, &curr->data,
747 (unsigned long)data_ptr,
748 curr->data_len);
749 slot++;
750
751 btrfs_delayed_item_release_metadata(root, curr);
752
753 list_del(&curr->tree_list);
754 btrfs_release_delayed_item(curr);
755 }
756
757error:
758 kfree(data_size);
759 kfree(keys);
760out:
761 return ret;
762}
763
764/*
765 * This helper can just do simple insertion that needn't extend item for new
766 * data, such as directory name index insertion, inode insertion.
767 */
768static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
769 struct btrfs_root *root,
770 struct btrfs_path *path,
771 struct btrfs_delayed_item *delayed_item)
772{
773 struct extent_buffer *leaf;
774 struct btrfs_item *item;
775 char *ptr;
776 int ret;
777
778 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
779 delayed_item->data_len);
780 if (ret < 0 && ret != -EEXIST)
781 return ret;
782
783 leaf = path->nodes[0];
784
785 item = btrfs_item_nr(leaf, path->slots[0]);
786 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
787
788 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
789 delayed_item->data_len);
790 btrfs_mark_buffer_dirty(leaf);
791
792 btrfs_delayed_item_release_metadata(root, delayed_item);
793 return 0;
794}
795
796/*
797 * we insert an item first, then if there are some continuous items, we try
798 * to insert those items into the same leaf.
799 */
800static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
801 struct btrfs_path *path,
802 struct btrfs_root *root,
803 struct btrfs_delayed_node *node)
804{
805 struct btrfs_delayed_item *curr, *prev;
806 int ret = 0;
807
808do_again:
809 mutex_lock(&node->mutex);
810 curr = __btrfs_first_delayed_insertion_item(node);
811 if (!curr)
812 goto insert_end;
813
814 ret = btrfs_insert_delayed_item(trans, root, path, curr);
815 if (ret < 0) {
816 btrfs_release_path(path);
817 goto insert_end;
818 }
819
820 prev = curr;
821 curr = __btrfs_next_delayed_item(prev);
822 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
823 /* insert the continuous items into the same leaf */
824 path->slots[0]++;
825 btrfs_batch_insert_items(trans, root, path, curr);
826 }
827 btrfs_release_delayed_item(prev);
828 btrfs_mark_buffer_dirty(path->nodes[0]);
829
830 btrfs_release_path(path);
831 mutex_unlock(&node->mutex);
832 goto do_again;
833
834insert_end:
835 mutex_unlock(&node->mutex);
836 return ret;
837}
838
839static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
840 struct btrfs_root *root,
841 struct btrfs_path *path,
842 struct btrfs_delayed_item *item)
843{
844 struct btrfs_delayed_item *curr, *next;
845 struct extent_buffer *leaf;
846 struct btrfs_key key;
847 struct list_head head;
848 int nitems, i, last_item;
849 int ret = 0;
850
851 BUG_ON(!path->nodes[0]);
852
853 leaf = path->nodes[0];
854
855 i = path->slots[0];
856 last_item = btrfs_header_nritems(leaf) - 1;
857 if (i > last_item)
858 return -ENOENT; /* FIXME: Is errno suitable? */
859
860 next = item;
861 INIT_LIST_HEAD(&head);
862 btrfs_item_key_to_cpu(leaf, &key, i);
863 nitems = 0;
864 /*
865 * count the number of the dir index items that we can delete in batch
866 */
867 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
868 list_add_tail(&next->tree_list, &head);
869 nitems++;
870
871 curr = next;
872 next = __btrfs_next_delayed_item(curr);
873 if (!next)
874 break;
875
876 if (!btrfs_is_continuous_delayed_item(curr, next))
877 break;
878
879 i++;
880 if (i > last_item)
881 break;
882 btrfs_item_key_to_cpu(leaf, &key, i);
883 }
884
885 if (!nitems)
886 return 0;
887
888 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
889 if (ret)
890 goto out;
891
892 list_for_each_entry_safe(curr, next, &head, tree_list) {
893 btrfs_delayed_item_release_metadata(root, curr);
894 list_del(&curr->tree_list);
895 btrfs_release_delayed_item(curr);
896 }
897
898out:
899 return ret;
900}
901
902static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
903 struct btrfs_path *path,
904 struct btrfs_root *root,
905 struct btrfs_delayed_node *node)
906{
907 struct btrfs_delayed_item *curr, *prev;
908 int ret = 0;
909
910do_again:
911 mutex_lock(&node->mutex);
912 curr = __btrfs_first_delayed_deletion_item(node);
913 if (!curr)
914 goto delete_fail;
915
916 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
917 if (ret < 0)
918 goto delete_fail;
919 else if (ret > 0) {
920 /*
921 * can't find the item which the node points to, so this node
922 * is invalid, just drop it.
923 */
924 prev = curr;
925 curr = __btrfs_next_delayed_item(prev);
926 btrfs_release_delayed_item(prev);
927 ret = 0;
928 btrfs_release_path(path);
929 if (curr)
930 goto do_again;
931 else
932 goto delete_fail;
933 }
934
935 btrfs_batch_delete_items(trans, root, path, curr);
936 btrfs_release_path(path);
937 mutex_unlock(&node->mutex);
938 goto do_again;
939
940delete_fail:
941 btrfs_release_path(path);
942 mutex_unlock(&node->mutex);
943 return ret;
944}
945
946static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
947{
948 struct btrfs_delayed_root *delayed_root;
949
950 if (delayed_node && delayed_node->inode_dirty) {
951 BUG_ON(!delayed_node->root);
952 delayed_node->inode_dirty = 0;
953 delayed_node->count--;
954
955 delayed_root = delayed_node->root->fs_info->delayed_root;
956 atomic_dec(&delayed_root->items);
957 if (atomic_read(&delayed_root->items) <
958 BTRFS_DELAYED_BACKGROUND &&
959 waitqueue_active(&delayed_root->wait))
960 wake_up(&delayed_root->wait);
961 }
962}
963
964static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
965 struct btrfs_root *root,
966 struct btrfs_path *path,
967 struct btrfs_delayed_node *node)
968{
969 struct btrfs_key key;
970 struct btrfs_inode_item *inode_item;
971 struct extent_buffer *leaf;
972 int ret;
973
974 mutex_lock(&node->mutex);
975 if (!node->inode_dirty) {
976 mutex_unlock(&node->mutex);
977 return 0;
978 }
979
980 key.objectid = node->inode_id;
981 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
982 key.offset = 0;
983 ret = btrfs_lookup_inode(trans, root, path, &key, 1);
984 if (ret > 0) {
985 btrfs_release_path(path);
986 mutex_unlock(&node->mutex);
987 return -ENOENT;
988 } else if (ret < 0) {
989 mutex_unlock(&node->mutex);
990 return ret;
991 }
992
993 btrfs_unlock_up_safe(path, 1);
994 leaf = path->nodes[0];
995 inode_item = btrfs_item_ptr(leaf, path->slots[0],
996 struct btrfs_inode_item);
997 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
998 sizeof(struct btrfs_inode_item));
999 btrfs_mark_buffer_dirty(leaf);
1000 btrfs_release_path(path);
1001
1002 btrfs_delayed_inode_release_metadata(root, node);
1003 btrfs_release_delayed_inode(node);
1004 mutex_unlock(&node->mutex);
1005
1006 return 0;
1007}
1008
1009/* Called when committing the transaction. */
1010int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1011 struct btrfs_root *root)
1012{
1013 struct btrfs_delayed_root *delayed_root;
1014 struct btrfs_delayed_node *curr_node, *prev_node;
1015 struct btrfs_path *path;
1016 int ret = 0;
1017
1018 path = btrfs_alloc_path();
1019 if (!path)
1020 return -ENOMEM;
1021 path->leave_spinning = 1;
1022
1023 delayed_root = btrfs_get_delayed_root(root);
1024
1025 curr_node = btrfs_first_delayed_node(delayed_root);
1026 while (curr_node) {
1027 root = curr_node->root;
1028 ret = btrfs_insert_delayed_items(trans, path, root,
1029 curr_node);
1030 if (!ret)
1031 ret = btrfs_delete_delayed_items(trans, path, root,
1032 curr_node);
1033 if (!ret)
1034 ret = btrfs_update_delayed_inode(trans, root, path,
1035 curr_node);
1036 if (ret) {
1037 btrfs_release_delayed_node(curr_node);
1038 break;
1039 }
1040
1041 prev_node = curr_node;
1042 curr_node = btrfs_next_delayed_node(curr_node);
1043 btrfs_release_delayed_node(prev_node);
1044 }
1045
1046 btrfs_free_path(path);
1047 return ret;
1048}
1049
1050static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1051 struct btrfs_delayed_node *node)
1052{
1053 struct btrfs_path *path;
1054 int ret;
1055
1056 path = btrfs_alloc_path();
1057 if (!path)
1058 return -ENOMEM;
1059 path->leave_spinning = 1;
1060
1061 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1062 if (!ret)
1063 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1064 if (!ret)
1065 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1066 btrfs_free_path(path);
1067
1068 return ret;
1069}
1070
1071int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1072 struct inode *inode)
1073{
1074 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1075 int ret;
1076
1077 if (!delayed_node)
1078 return 0;
1079
1080 mutex_lock(&delayed_node->mutex);
1081 if (!delayed_node->count) {
1082 mutex_unlock(&delayed_node->mutex);
1083 btrfs_release_delayed_node(delayed_node);
1084 return 0;
1085 }
1086 mutex_unlock(&delayed_node->mutex);
1087
1088 ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1089 btrfs_release_delayed_node(delayed_node);
1090 return ret;
1091}
1092
1093void btrfs_remove_delayed_node(struct inode *inode)
1094{
1095 struct btrfs_delayed_node *delayed_node;
1096
1097 delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1098 if (!delayed_node)
1099 return;
1100
1101 BTRFS_I(inode)->delayed_node = NULL;
1102 btrfs_release_delayed_node(delayed_node);
1103}
1104
1105struct btrfs_async_delayed_node {
1106 struct btrfs_root *root;
1107 struct btrfs_delayed_node *delayed_node;
1108 struct btrfs_work work;
1109};
1110
1111static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1112{
1113 struct btrfs_async_delayed_node *async_node;
1114 struct btrfs_trans_handle *trans;
1115 struct btrfs_path *path;
1116 struct btrfs_delayed_node *delayed_node = NULL;
1117 struct btrfs_root *root;
1118 unsigned long nr = 0;
1119 int need_requeue = 0;
1120 int ret;
1121
1122 async_node = container_of(work, struct btrfs_async_delayed_node, work);
1123
1124 path = btrfs_alloc_path();
1125 if (!path)
1126 goto out;
1127 path->leave_spinning = 1;
1128
1129 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root;
1131
1132 trans = btrfs_join_transaction(root, 0);
1133 if (IS_ERR(trans))
1134 goto free_path;
1135
1136 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1137 if (!ret)
1138 ret = btrfs_delete_delayed_items(trans, path, root,
1139 delayed_node);
1140
1141 if (!ret)
1142 btrfs_update_delayed_inode(trans, root, path, delayed_node);
1143
1144 /*
1145 * Maybe new delayed items have been inserted, so we need requeue
1146 * the work. Besides that, we must dequeue the empty delayed nodes
1147 * to avoid the race between delayed items balance and the worker.
1148 * The race like this:
1149 * Task1 Worker thread
1150 * count == 0, needn't requeue
1151 * also needn't insert the
1152 * delayed node into prepare
1153 * list again.
1154 * add lots of delayed items
1155 * queue the delayed node
1156 * already in the list,
1157 * and not in the prepare
1158 * list, it means the delayed
1159 * node is being dealt with
1160 * by the worker.
1161 * do delayed items balance
1162 * the delayed node is being
1163 * dealt with by the worker
1164 * now, just wait.
1165 * the worker goto idle.
1166 * Task1 will sleep until the transaction is commited.
1167 */
1168 mutex_lock(&delayed_node->mutex);
1169 if (delayed_node->count)
1170 need_requeue = 1;
1171 else
1172 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1173 delayed_node);
1174 mutex_unlock(&delayed_node->mutex);
1175
1176 nr = trans->blocks_used;
1177
1178 btrfs_end_transaction_dmeta(trans, root);
1179 __btrfs_btree_balance_dirty(root, nr);
1180free_path:
1181 btrfs_free_path(path);
1182out:
1183 if (need_requeue)
1184 btrfs_requeue_work(&async_node->work);
1185 else {
1186 btrfs_release_prepared_delayed_node(delayed_node);
1187 kfree(async_node);
1188 }
1189}
1190
1191static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1192 struct btrfs_root *root, int all)
1193{
1194 struct btrfs_async_delayed_node *async_node;
1195 struct btrfs_delayed_node *curr;
1196 int count = 0;
1197
1198again:
1199 curr = btrfs_first_prepared_delayed_node(delayed_root);
1200 if (!curr)
1201 return 0;
1202
1203 async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1204 if (!async_node) {
1205 btrfs_release_prepared_delayed_node(curr);
1206 return -ENOMEM;
1207 }
1208
1209 async_node->root = root;
1210 async_node->delayed_node = curr;
1211
1212 async_node->work.func = btrfs_async_run_delayed_node_done;
1213 async_node->work.flags = 0;
1214
1215 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1216 count++;
1217
1218 if (all || count < 4)
1219 goto again;
1220
1221 return 0;
1222}
1223
1224void btrfs_balance_delayed_items(struct btrfs_root *root)
1225{
1226 struct btrfs_delayed_root *delayed_root;
1227
1228 delayed_root = btrfs_get_delayed_root(root);
1229
1230 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1231 return;
1232
1233 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1234 int ret;
1235 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1236 if (ret)
1237 return;
1238
1239 wait_event_interruptible_timeout(
1240 delayed_root->wait,
1241 (atomic_read(&delayed_root->items) <
1242 BTRFS_DELAYED_BACKGROUND),
1243 HZ);
1244 return;
1245 }
1246
1247 btrfs_wq_run_delayed_node(delayed_root, root, 0);
1248}
1249
1250int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1251 struct btrfs_root *root, const char *name,
1252 int name_len, struct inode *dir,
1253 struct btrfs_disk_key *disk_key, u8 type,
1254 u64 index)
1255{
1256 struct btrfs_delayed_node *delayed_node;
1257 struct btrfs_delayed_item *delayed_item;
1258 struct btrfs_dir_item *dir_item;
1259 int ret;
1260
1261 delayed_node = btrfs_get_or_create_delayed_node(dir);
1262 if (IS_ERR(delayed_node))
1263 return PTR_ERR(delayed_node);
1264
1265 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1266 if (!delayed_item) {
1267 ret = -ENOMEM;
1268 goto release_node;
1269 }
1270
1271 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1272 /*
1273 * we have reserved enough space when we start a new transaction,
1274 * so reserving metadata failure is impossible
1275 */
1276 BUG_ON(ret);
1277
1278 delayed_item->key.objectid = btrfs_ino(dir);
1279 btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1280 delayed_item->key.offset = index;
1281
1282 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1283 dir_item->location = *disk_key;
1284 dir_item->transid = cpu_to_le64(trans->transid);
1285 dir_item->data_len = 0;
1286 dir_item->name_len = cpu_to_le16(name_len);
1287 dir_item->type = type;
1288 memcpy((char *)(dir_item + 1), name, name_len);
1289
1290 mutex_lock(&delayed_node->mutex);
1291 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1292 if (unlikely(ret)) {
1293 printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1294 "the insertion tree of the delayed node"
1295 "(root id: %llu, inode id: %llu, errno: %d)\n",
1296 name,
1297 (unsigned long long)delayed_node->root->objectid,
1298 (unsigned long long)delayed_node->inode_id,
1299 ret);
1300 BUG();
1301 }
1302 mutex_unlock(&delayed_node->mutex);
1303
1304release_node:
1305 btrfs_release_delayed_node(delayed_node);
1306 return ret;
1307}
1308
1309static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1310 struct btrfs_delayed_node *node,
1311 struct btrfs_key *key)
1312{
1313 struct btrfs_delayed_item *item;
1314
1315 mutex_lock(&node->mutex);
1316 item = __btrfs_lookup_delayed_insertion_item(node, key);
1317 if (!item) {
1318 mutex_unlock(&node->mutex);
1319 return 1;
1320 }
1321
1322 btrfs_delayed_item_release_metadata(root, item);
1323 btrfs_release_delayed_item(item);
1324 mutex_unlock(&node->mutex);
1325 return 0;
1326}
1327
1328int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1329 struct btrfs_root *root, struct inode *dir,
1330 u64 index)
1331{
1332 struct btrfs_delayed_node *node;
1333 struct btrfs_delayed_item *item;
1334 struct btrfs_key item_key;
1335 int ret;
1336
1337 node = btrfs_get_or_create_delayed_node(dir);
1338 if (IS_ERR(node))
1339 return PTR_ERR(node);
1340
1341 item_key.objectid = btrfs_ino(dir);
1342 btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1343 item_key.offset = index;
1344
1345 ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1346 if (!ret)
1347 goto end;
1348
1349 item = btrfs_alloc_delayed_item(0);
1350 if (!item) {
1351 ret = -ENOMEM;
1352 goto end;
1353 }
1354
1355 item->key = item_key;
1356
1357 ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1358 /*
1359 * we have reserved enough space when we start a new transaction,
1360 * so reserving metadata failure is impossible.
1361 */
1362 BUG_ON(ret);
1363
1364 mutex_lock(&node->mutex);
1365 ret = __btrfs_add_delayed_deletion_item(node, item);
1366 if (unlikely(ret)) {
1367 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1368 "into the deletion tree of the delayed node"
1369 "(root id: %llu, inode id: %llu, errno: %d)\n",
1370 (unsigned long long)index,
1371 (unsigned long long)node->root->objectid,
1372 (unsigned long long)node->inode_id,
1373 ret);
1374 BUG();
1375 }
1376 mutex_unlock(&node->mutex);
1377end:
1378 btrfs_release_delayed_node(node);
1379 return ret;
1380}
1381
1382int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1383{
1384 struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
1385 int ret = 0;
1386
1387 if (!delayed_node)
1388 return -ENOENT;
1389
1390 /*
1391 * Since we have held i_mutex of this directory, it is impossible that
1392 * a new directory index is added into the delayed node and index_cnt
1393 * is updated now. So we needn't lock the delayed node.
1394 */
1395 if (!delayed_node->index_cnt)
1396 return -EINVAL;
1397
1398 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1399 return ret;
1400}
1401
1402void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1403 struct list_head *del_list)
1404{
1405 struct btrfs_delayed_node *delayed_node;
1406 struct btrfs_delayed_item *item;
1407
1408 delayed_node = btrfs_get_delayed_node(inode);
1409 if (!delayed_node)
1410 return;
1411
1412 mutex_lock(&delayed_node->mutex);
1413 item = __btrfs_first_delayed_insertion_item(delayed_node);
1414 while (item) {
1415 atomic_inc(&item->refs);
1416 list_add_tail(&item->readdir_list, ins_list);
1417 item = __btrfs_next_delayed_item(item);
1418 }
1419
1420 item = __btrfs_first_delayed_deletion_item(delayed_node);
1421 while (item) {
1422 atomic_inc(&item->refs);
1423 list_add_tail(&item->readdir_list, del_list);
1424 item = __btrfs_next_delayed_item(item);
1425 }
1426 mutex_unlock(&delayed_node->mutex);
1427 /*
1428 * This delayed node is still cached in the btrfs inode, so refs
1429 * must be > 1 now, and we needn't check it is going to be freed
1430 * or not.
1431 *
1432 * Besides that, this function is used to read dir, we do not
1433 * insert/delete delayed items in this period. So we also needn't
1434 * requeue or dequeue this delayed node.
1435 */
1436 atomic_dec(&delayed_node->refs);
1437}
1438
1439void btrfs_put_delayed_items(struct list_head *ins_list,
1440 struct list_head *del_list)
1441{
1442 struct btrfs_delayed_item *curr, *next;
1443
1444 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1445 list_del(&curr->readdir_list);
1446 if (atomic_dec_and_test(&curr->refs))
1447 kfree(curr);
1448 }
1449
1450 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1451 list_del(&curr->readdir_list);
1452 if (atomic_dec_and_test(&curr->refs))
1453 kfree(curr);
1454 }
1455}
1456
1457int btrfs_should_delete_dir_index(struct list_head *del_list,
1458 u64 index)
1459{
1460 struct btrfs_delayed_item *curr, *next;
1461 int ret;
1462
1463 if (list_empty(del_list))
1464 return 0;
1465
1466 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1467 if (curr->key.offset > index)
1468 break;
1469
1470 list_del(&curr->readdir_list);
1471 ret = (curr->key.offset == index);
1472
1473 if (atomic_dec_and_test(&curr->refs))
1474 kfree(curr);
1475
1476 if (ret)
1477 return 1;
1478 else
1479 continue;
1480 }
1481 return 0;
1482}
1483
1484/*
1485 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1486 *
1487 */
1488int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1489 filldir_t filldir,
1490 struct list_head *ins_list)
1491{
1492 struct btrfs_dir_item *di;
1493 struct btrfs_delayed_item *curr, *next;
1494 struct btrfs_key location;
1495 char *name;
1496 int name_len;
1497 int over = 0;
1498 unsigned char d_type;
1499
1500 if (list_empty(ins_list))
1501 return 0;
1502
1503 /*
1504 * Changing the data of the delayed item is impossible. So
1505 * we needn't lock them. And we have held i_mutex of the
1506 * directory, nobody can delete any directory indexes now.
1507 */
1508 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1509 list_del(&curr->readdir_list);
1510
1511 if (curr->key.offset < filp->f_pos) {
1512 if (atomic_dec_and_test(&curr->refs))
1513 kfree(curr);
1514 continue;
1515 }
1516
1517 filp->f_pos = curr->key.offset;
1518
1519 di = (struct btrfs_dir_item *)curr->data;
1520 name = (char *)(di + 1);
1521 name_len = le16_to_cpu(di->name_len);
1522
1523 d_type = btrfs_filetype_table[di->type];
1524 btrfs_disk_key_to_cpu(&location, &di->location);
1525
1526 over = filldir(dirent, name, name_len, curr->key.offset,
1527 location.objectid, d_type);
1528
1529 if (atomic_dec_and_test(&curr->refs))
1530 kfree(curr);
1531
1532 if (over)
1533 return 1;
1534 }
1535 return 0;
1536}
1537
1538BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1539 generation, 64);
1540BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1541 sequence, 64);
1542BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1543 transid, 64);
1544BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1545BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1546 nbytes, 64);
1547BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1548 block_group, 64);
1549BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1550BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1551BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1552BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1553BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1554BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1555
1556BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1557BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1558
1559static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1560 struct btrfs_inode_item *inode_item,
1561 struct inode *inode)
1562{
1563 btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
1564 btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
1565 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1566 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1567 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1568 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1569 btrfs_set_stack_inode_generation(inode_item,
1570 BTRFS_I(inode)->generation);
1571 btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item,
1576 BTRFS_I(inode)->block_group);
1577
1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1579 inode->i_atime.tv_sec);
1580 btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1581 inode->i_atime.tv_nsec);
1582
1583 btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1584 inode->i_mtime.tv_sec);
1585 btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1586 inode->i_mtime.tv_nsec);
1587
1588 btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1589 inode->i_ctime.tv_sec);
1590 btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1591 inode->i_ctime.tv_nsec);
1592}
1593
1594int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode)
1596{
1597 struct btrfs_delayed_node *delayed_node;
1598 int ret;
1599
1600 delayed_node = btrfs_get_or_create_delayed_node(inode);
1601 if (IS_ERR(delayed_node))
1602 return PTR_ERR(delayed_node);
1603
1604 mutex_lock(&delayed_node->mutex);
1605 if (delayed_node->inode_dirty) {
1606 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1607 goto release_node;
1608 }
1609
1610 ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
1611 /*
1612 * we must reserve enough space when we start a new transaction,
1613 * so reserving metadata failure is impossible
1614 */
1615 BUG_ON(ret);
1616
1617 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1618 delayed_node->inode_dirty = 1;
1619 delayed_node->count++;
1620 atomic_inc(&root->fs_info->delayed_root->items);
1621release_node:
1622 mutex_unlock(&delayed_node->mutex);
1623 btrfs_release_delayed_node(delayed_node);
1624 return ret;
1625}
1626
1627static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1628{
1629 struct btrfs_root *root = delayed_node->root;
1630 struct btrfs_delayed_item *curr_item, *prev_item;
1631
1632 mutex_lock(&delayed_node->mutex);
1633 curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1634 while (curr_item) {
1635 btrfs_delayed_item_release_metadata(root, curr_item);
1636 prev_item = curr_item;
1637 curr_item = __btrfs_next_delayed_item(prev_item);
1638 btrfs_release_delayed_item(prev_item);
1639 }
1640
1641 curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1642 while (curr_item) {
1643 btrfs_delayed_item_release_metadata(root, curr_item);
1644 prev_item = curr_item;
1645 curr_item = __btrfs_next_delayed_item(prev_item);
1646 btrfs_release_delayed_item(prev_item);
1647 }
1648
1649 if (delayed_node->inode_dirty) {
1650 btrfs_delayed_inode_release_metadata(root, delayed_node);
1651 btrfs_release_delayed_inode(delayed_node);
1652 }
1653 mutex_unlock(&delayed_node->mutex);
1654}
1655
1656void btrfs_kill_delayed_inode_items(struct inode *inode)
1657{
1658 struct btrfs_delayed_node *delayed_node;
1659
1660 delayed_node = btrfs_get_delayed_node(inode);
1661 if (!delayed_node)
1662 return;
1663
1664 __btrfs_kill_delayed_node(delayed_node);
1665 btrfs_release_delayed_node(delayed_node);
1666}
1667
1668void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1669{
1670 u64 inode_id = 0;
1671 struct btrfs_delayed_node *delayed_nodes[8];
1672 int i, n;
1673
1674 while (1) {
1675 spin_lock(&root->inode_lock);
1676 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1677 (void **)delayed_nodes, inode_id,
1678 ARRAY_SIZE(delayed_nodes));
1679 if (!n) {
1680 spin_unlock(&root->inode_lock);
1681 break;
1682 }
1683
1684 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1685
1686 for (i = 0; i < n; i++)
1687 atomic_inc(&delayed_nodes[i]->refs);
1688 spin_unlock(&root->inode_lock);
1689
1690 for (i = 0; i < n; i++) {
1691 __btrfs_kill_delayed_node(delayed_nodes[i]);
1692 btrfs_release_delayed_node(delayed_nodes[i]);
1693 }
1694 }
1695}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
new file mode 100644
index 00000000000..eb7d240aa64
--- /dev/null
+++ b/fs/btrfs/delayed-inode.h
@@ -0,0 +1,141 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#ifndef __DELAYED_TREE_OPERATION_H
21#define __DELAYED_TREE_OPERATION_H
22
23#include <linux/rbtree.h>
24#include <linux/spinlock.h>
25#include <linux/mutex.h>
26#include <linux/list.h>
27#include <linux/wait.h>
28#include <asm/atomic.h>
29
30#include "ctree.h"
31
32/* types of the delayed item */
33#define BTRFS_DELAYED_INSERTION_ITEM 1
34#define BTRFS_DELAYED_DELETION_ITEM 2
35
36struct btrfs_delayed_root {
37 spinlock_t lock;
38 struct list_head node_list;
39 /*
40 * Used for delayed nodes which is waiting to be dealt with by the
41 * worker. If the delayed node is inserted into the work queue, we
42 * drop it from this list.
43 */
44 struct list_head prepare_list;
45 atomic_t items; /* for delayed items */
46 int nodes; /* for delayed nodes */
47 wait_queue_head_t wait;
48};
49
50struct btrfs_delayed_node {
51 u64 inode_id;
52 u64 bytes_reserved;
53 struct btrfs_root *root;
54 /* Used to add the node into the delayed root's node list. */
55 struct list_head n_list;
56 /*
57 * Used to add the node into the prepare list, the nodes in this list
58 * is waiting to be dealt with by the async worker.
59 */
60 struct list_head p_list;
61 struct rb_root ins_root;
62 struct rb_root del_root;
63 struct mutex mutex;
64 struct btrfs_inode_item inode_item;
65 atomic_t refs;
66 u64 index_cnt;
67 bool in_list;
68 bool inode_dirty;
69 int count;
70};
71
72struct btrfs_delayed_item {
73 struct rb_node rb_node;
74 struct btrfs_key key;
75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs;
81 int ins_or_del;
82 u32 data_len;
83 char data[0];
84};
85
86static inline void btrfs_init_delayed_root(
87 struct btrfs_delayed_root *delayed_root)
88{
89 atomic_set(&delayed_root->items, 0);
90 delayed_root->nodes = 0;
91 spin_lock_init(&delayed_root->lock);
92 init_waitqueue_head(&delayed_root->wait);
93 INIT_LIST_HEAD(&delayed_root->node_list);
94 INIT_LIST_HEAD(&delayed_root->prepare_list);
95}
96
97int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
98 struct btrfs_root *root, const char *name,
99 int name_len, struct inode *dir,
100 struct btrfs_disk_key *disk_key, u8 type,
101 u64 index);
102
103int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
104 struct btrfs_root *root, struct inode *dir,
105 u64 index);
106
107int btrfs_inode_delayed_dir_index_count(struct inode *inode);
108
109int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
110 struct btrfs_root *root);
111
112void btrfs_balance_delayed_items(struct btrfs_root *root);
113
114int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
115 struct inode *inode);
116/* Used for evicting the inode. */
117void btrfs_remove_delayed_node(struct inode *inode);
118void btrfs_kill_delayed_inode_items(struct inode *inode);
119
120
121int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
122 struct btrfs_root *root, struct inode *inode);
123
124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126
127/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list);
130void btrfs_put_delayed_items(struct list_head *ins_list,
131 struct list_head *del_list);
132int btrfs_should_delete_dir_index(struct list_head *del_list,
133 u64 index);
134int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
135 filldir_t filldir,
136 struct list_head *ins_list);
137
138/* for init */
139int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void);
141#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index ab8afed671a..1ddfca78e91 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -124,8 +124,9 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
124 * to use for the second index (if one is created). 124 * to use for the second index (if one is created).
125 */ 125 */
126int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root 126int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
127 *root, const char *name, int name_len, u64 dir, 127 *root, const char *name, int name_len,
128 struct btrfs_key *location, u8 type, u64 index) 128 struct inode *dir, struct btrfs_key *location,
129 u8 type, u64 index)
129{ 130{
130 int ret = 0; 131 int ret = 0;
131 int ret2 = 0; 132 int ret2 = 0;
@@ -137,13 +138,17 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
137 struct btrfs_disk_key disk_key; 138 struct btrfs_disk_key disk_key;
138 u32 data_size; 139 u32 data_size;
139 140
140 key.objectid = dir; 141 key.objectid = btrfs_ino(dir);
141 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 142 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
142 key.offset = btrfs_name_hash(name, name_len); 143 key.offset = btrfs_name_hash(name, name_len);
143 144
144 path = btrfs_alloc_path(); 145 path = btrfs_alloc_path();
146 if (!path)
147 return -ENOMEM;
145 path->leave_spinning = 1; 148 path->leave_spinning = 1;
146 149
150 btrfs_cpu_key_to_disk(&disk_key, location);
151
147 data_size = sizeof(*dir_item) + name_len; 152 data_size = sizeof(*dir_item) + name_len;
148 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 153 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
149 name, name_len); 154 name, name_len);
@@ -155,7 +160,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
155 } 160 }
156 161
157 leaf = path->nodes[0]; 162 leaf = path->nodes[0];
158 btrfs_cpu_key_to_disk(&disk_key, location);
159 btrfs_set_dir_item_key(leaf, dir_item, &disk_key); 163 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
160 btrfs_set_dir_type(leaf, dir_item, type); 164 btrfs_set_dir_type(leaf, dir_item, type);
161 btrfs_set_dir_data_len(leaf, dir_item, 0); 165 btrfs_set_dir_data_len(leaf, dir_item, 0);
@@ -174,27 +178,9 @@ second_insert:
174 } 178 }
175 btrfs_release_path(path); 179 btrfs_release_path(path);
176 180
177 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); 181 ret2 = btrfs_insert_delayed_dir_index(trans, root, name, name_len, dir,
178 key.offset = index; 182 &disk_key, type, index);
179 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
180 name, name_len);
181 if (IS_ERR(dir_item)) {
182 ret2 = PTR_ERR(dir_item);
183 goto out_free;
184 }
185 leaf = path->nodes[0];
186 btrfs_cpu_key_to_disk(&disk_key, location);
187 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
188 btrfs_set_dir_type(leaf, dir_item, type);
189 btrfs_set_dir_data_len(leaf, dir_item, 0);
190 btrfs_set_dir_name_len(leaf, dir_item, name_len);
191 btrfs_set_dir_transid(leaf, dir_item, trans->transid);
192 name_ptr = (unsigned long)(dir_item + 1);
193 write_extent_buffer(leaf, name, name_ptr, name_len);
194 btrfs_mark_buffer_dirty(leaf);
195
196out_free: 183out_free:
197
198 btrfs_free_path(path); 184 btrfs_free_path(path);
199 if (ret) 185 if (ret)
200 return ret; 186 return ret;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index cb9d1b8bfe7..a2eb3a3755d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -42,6 +42,7 @@
42#include "locking.h" 42#include "locking.h"
43#include "tree-log.h" 43#include "tree-log.h"
44#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h"
45 46
46static struct extent_io_ops btree_extent_io_ops; 47static struct extent_io_ops btree_extent_io_ops;
47static void end_workqueue_fn(struct btrfs_work *work); 48static void end_workqueue_fn(struct btrfs_work *work);
@@ -1045,6 +1046,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1045 root->name = NULL; 1046 root->name = NULL;
1046 root->in_sysfs = 0; 1047 root->in_sysfs = 0;
1047 root->inode_tree = RB_ROOT; 1048 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1048 root->block_rsv = NULL; 1050 root->block_rsv = NULL;
1049 root->orphan_block_rsv = NULL; 1051 root->orphan_block_rsv = NULL;
1050 1052
@@ -1298,6 +1300,19 @@ again:
1298 if (IS_ERR(root)) 1300 if (IS_ERR(root))
1299 return root; 1301 return root;
1300 1302
1303 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1304 if (!root->free_ino_ctl)
1305 goto fail;
1306 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1307 GFP_NOFS);
1308 if (!root->free_ino_pinned)
1309 goto fail;
1310
1311 btrfs_init_free_ino_ctl(root);
1312 mutex_init(&root->fs_commit_mutex);
1313 spin_lock_init(&root->cache_lock);
1314 init_waitqueue_head(&root->cache_wait);
1315
1301 set_anon_super(&root->anon_super, NULL); 1316 set_anon_super(&root->anon_super, NULL);
1302 1317
1303 if (btrfs_root_refs(&root->root_item) == 0) { 1318 if (btrfs_root_refs(&root->root_item) == 0) {
@@ -1631,6 +1646,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1631 1646
1632 INIT_LIST_HEAD(&fs_info->ordered_extents); 1647 INIT_LIST_HEAD(&fs_info->ordered_extents);
1633 spin_lock_init(&fs_info->ordered_extent_lock); 1648 spin_lock_init(&fs_info->ordered_extent_lock);
1649 fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
1650 GFP_NOFS);
1651 if (!fs_info->delayed_root) {
1652 err = -ENOMEM;
1653 goto fail_iput;
1654 }
1655 btrfs_init_delayed_root(fs_info->delayed_root);
1634 1656
1635 sb->s_blocksize = 4096; 1657 sb->s_blocksize = 4096;
1636 sb->s_blocksize_bits = blksize_bits(4096); 1658 sb->s_blocksize_bits = blksize_bits(4096);
@@ -1696,7 +1718,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1696 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1718 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1697 if (!bh) { 1719 if (!bh) {
1698 err = -EINVAL; 1720 err = -EINVAL;
1699 goto fail_iput; 1721 goto fail_alloc;
1700 } 1722 }
1701 1723
1702 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 1724 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
@@ -1708,7 +1730,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1708 1730
1709 disk_super = &fs_info->super_copy; 1731 disk_super = &fs_info->super_copy;
1710 if (!btrfs_super_root(disk_super)) 1732 if (!btrfs_super_root(disk_super))
1711 goto fail_iput; 1733 goto fail_alloc;
1712 1734
1713 /* check FS state, whether FS is broken. */ 1735 /* check FS state, whether FS is broken. */
1714 fs_info->fs_state |= btrfs_super_flags(disk_super); 1736 fs_info->fs_state |= btrfs_super_flags(disk_super);
@@ -1724,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1724 ret = btrfs_parse_options(tree_root, options); 1746 ret = btrfs_parse_options(tree_root, options);
1725 if (ret) { 1747 if (ret) {
1726 err = ret; 1748 err = ret;
1727 goto fail_iput; 1749 goto fail_alloc;
1728 } 1750 }
1729 1751
1730 features = btrfs_super_incompat_flags(disk_super) & 1752 features = btrfs_super_incompat_flags(disk_super) &
@@ -1734,7 +1756,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1734 "unsupported optional features (%Lx).\n", 1756 "unsupported optional features (%Lx).\n",
1735 (unsigned long long)features); 1757 (unsigned long long)features);
1736 err = -EINVAL; 1758 err = -EINVAL;
1737 goto fail_iput; 1759 goto fail_alloc;
1738 } 1760 }
1739 1761
1740 features = btrfs_super_incompat_flags(disk_super); 1762 features = btrfs_super_incompat_flags(disk_super);
@@ -1750,7 +1772,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1750 "unsupported option features (%Lx).\n", 1772 "unsupported option features (%Lx).\n",
1751 (unsigned long long)features); 1773 (unsigned long long)features);
1752 err = -EINVAL; 1774 err = -EINVAL;
1753 goto fail_iput; 1775 goto fail_alloc;
1754 } 1776 }
1755 1777
1756 btrfs_init_workers(&fs_info->generic_worker, 1778 btrfs_init_workers(&fs_info->generic_worker,
@@ -1797,6 +1819,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1797 &fs_info->generic_worker); 1819 &fs_info->generic_worker);
1798 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", 1820 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1799 1, &fs_info->generic_worker); 1821 1, &fs_info->generic_worker);
1822 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
1823 fs_info->thread_pool_size,
1824 &fs_info->generic_worker);
1800 1825
1801 /* 1826 /*
1802 * endios are largely parallel and should have a very 1827 * endios are largely parallel and should have a very
@@ -1818,6 +1843,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1818 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1843 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1819 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1844 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1820 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 1845 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1846 btrfs_start_workers(&fs_info->delayed_workers, 1);
1821 1847
1822 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1848 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1823 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1849 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2074,6 +2100,9 @@ fail_sb_buffer:
2074 btrfs_stop_workers(&fs_info->endio_write_workers); 2100 btrfs_stop_workers(&fs_info->endio_write_workers);
2075 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2101 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2076 btrfs_stop_workers(&fs_info->submit_workers); 2102 btrfs_stop_workers(&fs_info->submit_workers);
2103 btrfs_stop_workers(&fs_info->delayed_workers);
2104fail_alloc:
2105 kfree(fs_info->delayed_root);
2077fail_iput: 2106fail_iput:
2078 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2107 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2079 iput(fs_info->btree_inode); 2108 iput(fs_info->btree_inode);
@@ -2338,12 +2367,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2338 if (btrfs_root_refs(&root->root_item) == 0) 2367 if (btrfs_root_refs(&root->root_item) == 0)
2339 synchronize_srcu(&fs_info->subvol_srcu); 2368 synchronize_srcu(&fs_info->subvol_srcu);
2340 2369
2370 __btrfs_remove_free_space_cache(root->free_ino_pinned);
2371 __btrfs_remove_free_space_cache(root->free_ino_ctl);
2341 free_fs_root(root); 2372 free_fs_root(root);
2342 return 0; 2373 return 0;
2343} 2374}
2344 2375
2345static void free_fs_root(struct btrfs_root *root) 2376static void free_fs_root(struct btrfs_root *root)
2346{ 2377{
2378 iput(root->cache_inode);
2347 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2379 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2348 if (root->anon_super.s_dev) { 2380 if (root->anon_super.s_dev) {
2349 down_write(&root->anon_super.s_umount); 2381 down_write(&root->anon_super.s_umount);
@@ -2351,6 +2383,8 @@ static void free_fs_root(struct btrfs_root *root)
2351 } 2383 }
2352 free_extent_buffer(root->node); 2384 free_extent_buffer(root->node);
2353 free_extent_buffer(root->commit_root); 2385 free_extent_buffer(root->commit_root);
2386 kfree(root->free_ino_ctl);
2387 kfree(root->free_ino_pinned);
2354 kfree(root->name); 2388 kfree(root->name);
2355 kfree(root); 2389 kfree(root);
2356} 2390}
@@ -2512,6 +2546,7 @@ int close_ctree(struct btrfs_root *root)
2512 del_fs_roots(fs_info); 2546 del_fs_roots(fs_info);
2513 2547
2514 iput(fs_info->btree_inode); 2548 iput(fs_info->btree_inode);
2549 kfree(fs_info->delayed_root);
2515 2550
2516 btrfs_stop_workers(&fs_info->generic_worker); 2551 btrfs_stop_workers(&fs_info->generic_worker);
2517 btrfs_stop_workers(&fs_info->fixup_workers); 2552 btrfs_stop_workers(&fs_info->fixup_workers);
@@ -2523,6 +2558,7 @@ int close_ctree(struct btrfs_root *root)
2523 btrfs_stop_workers(&fs_info->endio_write_workers); 2558 btrfs_stop_workers(&fs_info->endio_write_workers);
2524 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2559 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2525 btrfs_stop_workers(&fs_info->submit_workers); 2560 btrfs_stop_workers(&fs_info->submit_workers);
2561 btrfs_stop_workers(&fs_info->delayed_workers);
2526 2562
2527 btrfs_close_devices(fs_info->fs_devices); 2563 btrfs_close_devices(fs_info->fs_devices);
2528 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2564 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2599,6 +2635,29 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2599 if (current->flags & PF_MEMALLOC) 2635 if (current->flags & PF_MEMALLOC)
2600 return; 2636 return;
2601 2637
2638 btrfs_balance_delayed_items(root);
2639
2640 num_dirty = root->fs_info->dirty_metadata_bytes;
2641
2642 if (num_dirty > thresh) {
2643 balance_dirty_pages_ratelimited_nr(
2644 root->fs_info->btree_inode->i_mapping, 1);
2645 }
2646 return;
2647}
2648
2649void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2650{
2651 /*
2652 * looks as though older kernels can get into trouble with
2653 * this code, they end up stuck in balance_dirty_pages forever
2654 */
2655 u64 num_dirty;
2656 unsigned long thresh = 32 * 1024 * 1024;
2657
2658 if (current->flags & PF_MEMALLOC)
2659 return;
2660
2602 num_dirty = root->fs_info->dirty_metadata_bytes; 2661 num_dirty = root->fs_info->dirty_metadata_bytes;
2603 2662
2604 if (num_dirty > thresh) { 2663 if (num_dirty > thresh) {
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 2d75f9e896f..a0b610a67aa 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,6 +61,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
61 struct btrfs_key *location); 61 struct btrfs_key *location);
62int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); 62int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
63void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 63void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
64void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
64int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); 65int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
65void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 66void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
66int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); 67int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index b4ffad859ad..1b8dc33778f 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -32,7 +32,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
32 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
33 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
34 34
35 fid->objectid = inode->i_ino; 35 fid->objectid = btrfs_ino(inode);
36 fid->root_objectid = BTRFS_I(inode)->root->objectid; 36 fid->root_objectid = BTRFS_I(inode)->root->objectid;
37 fid->gen = inode->i_generation; 37 fid->gen = inode->i_generation;
38 38
@@ -178,13 +178,13 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
178 if (!path) 178 if (!path)
179 return ERR_PTR(-ENOMEM); 179 return ERR_PTR(-ENOMEM);
180 180
181 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 181 if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
182 key.objectid = root->root_key.objectid; 182 key.objectid = root->root_key.objectid;
183 key.type = BTRFS_ROOT_BACKREF_KEY; 183 key.type = BTRFS_ROOT_BACKREF_KEY;
184 key.offset = (u64)-1; 184 key.offset = (u64)-1;
185 root = root->fs_info->tree_root; 185 root = root->fs_info->tree_root;
186 } else { 186 } else {
187 key.objectid = dir->i_ino; 187 key.objectid = btrfs_ino(dir);
188 key.type = BTRFS_INODE_REF_KEY; 188 key.type = BTRFS_INODE_REF_KEY;
189 key.offset = (u64)-1; 189 key.offset = (u64)-1;
190 } 190 }
@@ -244,6 +244,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
244 struct btrfs_key key; 244 struct btrfs_key key;
245 int name_len; 245 int name_len;
246 int ret; 246 int ret;
247 u64 ino;
247 248
248 if (!dir || !inode) 249 if (!dir || !inode)
249 return -EINVAL; 250 return -EINVAL;
@@ -251,19 +252,21 @@ static int btrfs_get_name(struct dentry *parent, char *name,
251 if (!S_ISDIR(dir->i_mode)) 252 if (!S_ISDIR(dir->i_mode))
252 return -EINVAL; 253 return -EINVAL;
253 254
255 ino = btrfs_ino(inode);
256
254 path = btrfs_alloc_path(); 257 path = btrfs_alloc_path();
255 if (!path) 258 if (!path)
256 return -ENOMEM; 259 return -ENOMEM;
257 path->leave_spinning = 1; 260 path->leave_spinning = 1;
258 261
259 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 262 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
260 key.objectid = BTRFS_I(inode)->root->root_key.objectid; 263 key.objectid = BTRFS_I(inode)->root->root_key.objectid;
261 key.type = BTRFS_ROOT_BACKREF_KEY; 264 key.type = BTRFS_ROOT_BACKREF_KEY;
262 key.offset = (u64)-1; 265 key.offset = (u64)-1;
263 root = root->fs_info->tree_root; 266 root = root->fs_info->tree_root;
264 } else { 267 } else {
265 key.objectid = inode->i_ino; 268 key.objectid = ino;
266 key.offset = dir->i_ino; 269 key.offset = btrfs_ino(dir);
267 key.type = BTRFS_INODE_REF_KEY; 270 key.type = BTRFS_INODE_REF_KEY;
268 } 271 }
269 272
@@ -272,7 +275,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
272 btrfs_free_path(path); 275 btrfs_free_path(path);
273 return ret; 276 return ret;
274 } else if (ret > 0) { 277 } else if (ret > 0) {
275 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 278 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
276 path->slots[0]--; 279 path->slots[0]--;
277 } else { 280 } else {
278 btrfs_free_path(path); 281 btrfs_free_path(path);
@@ -281,11 +284,11 @@ static int btrfs_get_name(struct dentry *parent, char *name,
281 } 284 }
282 leaf = path->nodes[0]; 285 leaf = path->nodes[0];
283 286
284 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 287 if (ino == BTRFS_FIRST_FREE_OBJECTID) {
285 rref = btrfs_item_ptr(leaf, path->slots[0], 288 rref = btrfs_item_ptr(leaf, path->slots[0],
286 struct btrfs_root_ref); 289 struct btrfs_root_ref);
287 name_ptr = (unsigned long)(rref + 1); 290 name_ptr = (unsigned long)(rref + 1);
288 name_len = btrfs_root_ref_name_len(leaf, rref); 291 name_len = btrfs_root_ref_name_len(leaf, rref);
289 } else { 292 } else {
290 iref = btrfs_item_ptr(leaf, path->slots[0], 293 iref = btrfs_item_ptr(leaf, path->slots[0],
291 struct btrfs_inode_ref); 294 struct btrfs_inode_ref);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b457f195636..98ca149bdbc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
105 WARN_ON(cache->pinned > 0); 105 WARN_ON(cache->pinned > 0);
106 WARN_ON(cache->reserved > 0); 106 WARN_ON(cache->reserved > 0);
107 WARN_ON(cache->reserved_pinned > 0); 107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl);
108 kfree(cache); 109 kfree(cache);
109 } 110 }
110} 111}
@@ -3036,7 +3037,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3036 /* make sure bytes are sectorsize aligned */ 3037 /* make sure bytes are sectorsize aligned */
3037 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3038 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3038 3039
3039 if (root == root->fs_info->tree_root) { 3040 if (root == root->fs_info->tree_root ||
3041 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3040 alloc_chunk = 0; 3042 alloc_chunk = 0;
3041 committed = 1; 3043 committed = 1;
3042 } 3044 }
@@ -3834,12 +3836,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3834 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3836 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3835} 3837}
3836 3838
3837static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3838{
3839 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3840 3 * num_items;
3841}
3842
3843int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3839int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3844 struct btrfs_root *root, 3840 struct btrfs_root *root,
3845 int num_items) 3841 int num_items)
@@ -3850,7 +3846,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3850 if (num_items == 0 || root->fs_info->chunk_root == root) 3846 if (num_items == 0 || root->fs_info->chunk_root == root)
3851 return 0; 3847 return 0;
3852 3848
3853 num_bytes = calc_trans_metadata_size(root, num_items); 3849 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
3854 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3850 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3855 num_bytes); 3851 num_bytes);
3856 if (!ret) { 3852 if (!ret) {
@@ -3889,14 +3885,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
3889 * If all of the metadata space is used, we can commit 3885 * If all of the metadata space is used, we can commit
3890 * transaction and use space it freed. 3886 * transaction and use space it freed.
3891 */ 3887 */
3892 u64 num_bytes = calc_trans_metadata_size(root, 4); 3888 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
3893 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3889 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3894} 3890}
3895 3891
3896void btrfs_orphan_release_metadata(struct inode *inode) 3892void btrfs_orphan_release_metadata(struct inode *inode)
3897{ 3893{
3898 struct btrfs_root *root = BTRFS_I(inode)->root; 3894 struct btrfs_root *root = BTRFS_I(inode)->root;
3899 u64 num_bytes = calc_trans_metadata_size(root, 4); 3895 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
3900 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3896 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
3901} 3897}
3902 3898
@@ -3910,7 +3906,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3910 * two for root back/forward refs, two for directory entries 3906 * two for root back/forward refs, two for directory entries
3911 * and one for root of the snapshot. 3907 * and one for root of the snapshot.
3912 */ 3908 */
3913 u64 num_bytes = calc_trans_metadata_size(root, 5); 3909 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
3914 dst_rsv->space_info = src_rsv->space_info; 3910 dst_rsv->space_info = src_rsv->space_info;
3915 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3911 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3916} 3912}
@@ -3939,7 +3935,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3939 3935
3940 if (nr_extents > reserved_extents) { 3936 if (nr_extents > reserved_extents) {
3941 nr_extents -= reserved_extents; 3937 nr_extents -= reserved_extents;
3942 to_reserve = calc_trans_metadata_size(root, nr_extents); 3938 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
3943 } else { 3939 } else {
3944 nr_extents = 0; 3940 nr_extents = 0;
3945 to_reserve = 0; 3941 to_reserve = 0;
@@ -3993,7 +3989,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3993 3989
3994 to_free = calc_csum_metadata_size(inode, num_bytes); 3990 to_free = calc_csum_metadata_size(inode, num_bytes);
3995 if (nr_extents > 0) 3991 if (nr_extents > 0)
3996 to_free += calc_trans_metadata_size(root, nr_extents); 3992 to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
3997 3993
3998 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 3994 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
3999 to_free); 3995 to_free);
@@ -4754,7 +4750,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4754 return 0; 4750 return 0;
4755 4751
4756 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4752 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4757 (cache->free_space >= num_bytes)); 4753 (cache->free_space_ctl->free_space >= num_bytes));
4758 4754
4759 put_caching_control(caching_ctl); 4755 put_caching_control(caching_ctl);
4760 return 0; 4756 return 0;
@@ -6908,10 +6904,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6908 ret = -ENOMEM; 6904 ret = -ENOMEM;
6909 goto error; 6905 goto error;
6910 } 6906 }
6907 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
6908 GFP_NOFS);
6909 if (!cache->free_space_ctl) {
6910 kfree(cache);
6911 ret = -ENOMEM;
6912 goto error;
6913 }
6911 6914
6912 atomic_set(&cache->count, 1); 6915 atomic_set(&cache->count, 1);
6913 spin_lock_init(&cache->lock); 6916 spin_lock_init(&cache->lock);
6914 spin_lock_init(&cache->tree_lock);
6915 cache->fs_info = info; 6917 cache->fs_info = info;
6916 INIT_LIST_HEAD(&cache->list); 6918 INIT_LIST_HEAD(&cache->list);
6917 INIT_LIST_HEAD(&cache->cluster_list); 6919 INIT_LIST_HEAD(&cache->cluster_list);
@@ -6919,14 +6921,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6919 if (need_clear) 6921 if (need_clear)
6920 cache->disk_cache_state = BTRFS_DC_CLEAR; 6922 cache->disk_cache_state = BTRFS_DC_CLEAR;
6921 6923
6922 /*
6923 * we only want to have 32k of ram per block group for keeping
6924 * track of free space, and if we pass 1/2 of that we want to
6925 * start converting things over to using bitmaps
6926 */
6927 cache->extents_thresh = ((1024 * 32) / 2) /
6928 sizeof(struct btrfs_free_space);
6929
6930 read_extent_buffer(leaf, &cache->item, 6924 read_extent_buffer(leaf, &cache->item,
6931 btrfs_item_ptr_offset(leaf, path->slots[0]), 6925 btrfs_item_ptr_offset(leaf, path->slots[0]),
6932 sizeof(cache->item)); 6926 sizeof(cache->item));
@@ -6937,6 +6931,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6937 cache->flags = btrfs_block_group_flags(&cache->item); 6931 cache->flags = btrfs_block_group_flags(&cache->item);
6938 cache->sectorsize = root->sectorsize; 6932 cache->sectorsize = root->sectorsize;
6939 6933
6934 btrfs_init_free_space_ctl(cache);
6935
6940 /* 6936 /*
6941 * We need to exclude the super stripes now so that the space 6937 * We need to exclude the super stripes now so that the space
6942 * info has super bytes accounted for, otherwise we'll think 6938 * info has super bytes accounted for, otherwise we'll think
@@ -7023,6 +7019,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7023 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7019 cache = kzalloc(sizeof(*cache), GFP_NOFS);
7024 if (!cache) 7020 if (!cache)
7025 return -ENOMEM; 7021 return -ENOMEM;
7022 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
7023 GFP_NOFS);
7024 if (!cache->free_space_ctl) {
7025 kfree(cache);
7026 return -ENOMEM;
7027 }
7026 7028
7027 cache->key.objectid = chunk_offset; 7029 cache->key.objectid = chunk_offset;
7028 cache->key.offset = size; 7030 cache->key.offset = size;
@@ -7030,19 +7032,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7030 cache->sectorsize = root->sectorsize; 7032 cache->sectorsize = root->sectorsize;
7031 cache->fs_info = root->fs_info; 7033 cache->fs_info = root->fs_info;
7032 7034
7033 /*
7034 * we only want to have 32k of ram per block group for keeping track
7035 * of free space, and if we pass 1/2 of that we want to start
7036 * converting things over to using bitmaps
7037 */
7038 cache->extents_thresh = ((1024 * 32) / 2) /
7039 sizeof(struct btrfs_free_space);
7040 atomic_set(&cache->count, 1); 7035 atomic_set(&cache->count, 1);
7041 spin_lock_init(&cache->lock); 7036 spin_lock_init(&cache->lock);
7042 spin_lock_init(&cache->tree_lock);
7043 INIT_LIST_HEAD(&cache->list); 7037 INIT_LIST_HEAD(&cache->list);
7044 INIT_LIST_HEAD(&cache->cluster_list); 7038 INIT_LIST_HEAD(&cache->cluster_list);
7045 7039
7040 btrfs_init_free_space_ctl(cache);
7041
7046 btrfs_set_block_group_used(&cache->item, bytes_used); 7042 btrfs_set_block_group_used(&cache->item, bytes_used);
7047 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7043 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
7048 cache->flags = type; 7044 cache->flags = type;
@@ -7209,23 +7205,38 @@ out:
7209int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 7205int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
7210{ 7206{
7211 struct btrfs_space_info *space_info; 7207 struct btrfs_space_info *space_info;
7208 struct btrfs_super_block *disk_super;
7209 u64 features;
7210 u64 flags;
7211 int mixed = 0;
7212 int ret; 7212 int ret;
7213 7213
7214 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, 7214 disk_super = &fs_info->super_copy;
7215 &space_info); 7215 if (!btrfs_super_root(disk_super))
7216 if (ret) 7216 return 1;
7217 return ret;
7218 7217
7219 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, 7218 features = btrfs_super_incompat_flags(disk_super);
7220 &space_info); 7219 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
7221 if (ret) 7220 mixed = 1;
7222 return ret;
7223 7221
7224 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, 7222 flags = BTRFS_BLOCK_GROUP_SYSTEM;
7225 &space_info); 7223 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7226 if (ret) 7224 if (ret)
7227 return ret; 7225 goto out;
7228 7226
7227 if (mixed) {
7228 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
7229 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7230 } else {
7231 flags = BTRFS_BLOCK_GROUP_METADATA;
7232 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7233 if (ret)
7234 goto out;
7235
7236 flags = BTRFS_BLOCK_GROUP_DATA;
7237 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7238 }
7239out:
7229 return ret; 7240 return ret;
7230} 7241}
7231 7242
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 91208296ff2..64c8b361b53 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2810,7 +2810,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2810 * because there might be preallocation past i_size 2810 * because there might be preallocation past i_size
2811 */ 2811 */
2812 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2812 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2813 path, inode->i_ino, -1, 0); 2813 path, btrfs_ino(inode), -1, 0);
2814 if (ret < 0) { 2814 if (ret < 0) {
2815 btrfs_free_path(path); 2815 btrfs_free_path(path);
2816 return ret; 2816 return ret;
@@ -2823,7 +2823,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2823 found_type = btrfs_key_type(&found_key); 2823 found_type = btrfs_key_type(&found_key);
2824 2824
2825 /* No extents, but there might be delalloc bits */ 2825 /* No extents, but there might be delalloc bits */
2826 if (found_key.objectid != inode->i_ino || 2826 if (found_key.objectid != btrfs_ino(inode) ||
2827 found_type != BTRFS_EXTENT_DATA_KEY) { 2827 found_type != BTRFS_EXTENT_DATA_KEY) {
2828 /* have to trust i_size as the end */ 2828 /* have to trust i_size as the end */
2829 last = (u64)-1; 2829 last = (u64)-1;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f47e43d855a..29e014984f6 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -208,8 +208,9 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
208 EXTENT_NODATASUM, GFP_NOFS); 208 EXTENT_NODATASUM, GFP_NOFS);
209 } else { 209 } else {
210 printk(KERN_INFO "btrfs no csum found " 210 printk(KERN_INFO "btrfs no csum found "
211 "for inode %lu start %llu\n", 211 "for inode %llu start %llu\n",
212 inode->i_ino, 212 (unsigned long long)
213 btrfs_ino(inode),
213 (unsigned long long)offset); 214 (unsigned long long)offset);
214 } 215 }
215 item = NULL; 216 item = NULL;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 566bdf298ea..58ddc444215 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -298,6 +298,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
298 struct btrfs_path *path; 298 struct btrfs_path *path;
299 struct btrfs_key key; 299 struct btrfs_key key;
300 struct btrfs_key new_key; 300 struct btrfs_key new_key;
301 u64 ino = btrfs_ino(inode);
301 u64 search_start = start; 302 u64 search_start = start;
302 u64 disk_bytenr = 0; 303 u64 disk_bytenr = 0;
303 u64 num_bytes = 0; 304 u64 num_bytes = 0;
@@ -318,14 +319,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
318 319
319 while (1) { 320 while (1) {
320 recow = 0; 321 recow = 0;
321 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 322 ret = btrfs_lookup_file_extent(trans, root, path, ino,
322 search_start, -1); 323 search_start, -1);
323 if (ret < 0) 324 if (ret < 0)
324 break; 325 break;
325 if (ret > 0 && path->slots[0] > 0 && search_start == start) { 326 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
326 leaf = path->nodes[0]; 327 leaf = path->nodes[0];
327 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 328 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
328 if (key.objectid == inode->i_ino && 329 if (key.objectid == ino &&
329 key.type == BTRFS_EXTENT_DATA_KEY) 330 key.type == BTRFS_EXTENT_DATA_KEY)
330 path->slots[0]--; 331 path->slots[0]--;
331 } 332 }
@@ -346,7 +347,7 @@ next_slot:
346 } 347 }
347 348
348 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 349 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
349 if (key.objectid > inode->i_ino || 350 if (key.objectid > ino ||
350 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 351 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
351 break; 352 break;
352 353
@@ -592,6 +593,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
592 int del_slot = 0; 593 int del_slot = 0;
593 int recow; 594 int recow;
594 int ret; 595 int ret;
596 u64 ino = btrfs_ino(inode);
595 597
596 btrfs_drop_extent_cache(inode, start, end - 1, 0); 598 btrfs_drop_extent_cache(inode, start, end - 1, 0);
597 599
@@ -600,7 +602,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
600again: 602again:
601 recow = 0; 603 recow = 0;
602 split = start; 604 split = start;
603 key.objectid = inode->i_ino; 605 key.objectid = ino;
604 key.type = BTRFS_EXTENT_DATA_KEY; 606 key.type = BTRFS_EXTENT_DATA_KEY;
605 key.offset = split; 607 key.offset = split;
606 608
@@ -612,8 +614,7 @@ again:
612 614
613 leaf = path->nodes[0]; 615 leaf = path->nodes[0];
614 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 616 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
615 BUG_ON(key.objectid != inode->i_ino || 617 BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
616 key.type != BTRFS_EXTENT_DATA_KEY);
617 fi = btrfs_item_ptr(leaf, path->slots[0], 618 fi = btrfs_item_ptr(leaf, path->slots[0],
618 struct btrfs_file_extent_item); 619 struct btrfs_file_extent_item);
619 BUG_ON(btrfs_file_extent_type(leaf, fi) != 620 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
@@ -630,7 +631,7 @@ again:
630 other_start = 0; 631 other_start = 0;
631 other_end = start; 632 other_end = start;
632 if (extent_mergeable(leaf, path->slots[0] - 1, 633 if (extent_mergeable(leaf, path->slots[0] - 1,
633 inode->i_ino, bytenr, orig_offset, 634 ino, bytenr, orig_offset,
634 &other_start, &other_end)) { 635 &other_start, &other_end)) {
635 new_key.offset = end; 636 new_key.offset = end;
636 btrfs_set_item_key_safe(trans, root, path, &new_key); 637 btrfs_set_item_key_safe(trans, root, path, &new_key);
@@ -653,7 +654,7 @@ again:
653 other_start = end; 654 other_start = end;
654 other_end = 0; 655 other_end = 0;
655 if (extent_mergeable(leaf, path->slots[0] + 1, 656 if (extent_mergeable(leaf, path->slots[0] + 1,
656 inode->i_ino, bytenr, orig_offset, 657 ino, bytenr, orig_offset,
657 &other_start, &other_end)) { 658 &other_start, &other_end)) {
658 fi = btrfs_item_ptr(leaf, path->slots[0], 659 fi = btrfs_item_ptr(leaf, path->slots[0],
659 struct btrfs_file_extent_item); 660 struct btrfs_file_extent_item);
@@ -702,7 +703,7 @@ again:
702 703
703 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 704 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
704 root->root_key.objectid, 705 root->root_key.objectid,
705 inode->i_ino, orig_offset); 706 ino, orig_offset);
706 BUG_ON(ret); 707 BUG_ON(ret);
707 708
708 if (split == start) { 709 if (split == start) {
@@ -718,7 +719,7 @@ again:
718 other_start = end; 719 other_start = end;
719 other_end = 0; 720 other_end = 0;
720 if (extent_mergeable(leaf, path->slots[0] + 1, 721 if (extent_mergeable(leaf, path->slots[0] + 1,
721 inode->i_ino, bytenr, orig_offset, 722 ino, bytenr, orig_offset,
722 &other_start, &other_end)) { 723 &other_start, &other_end)) {
723 if (recow) { 724 if (recow) {
724 btrfs_release_path(path); 725 btrfs_release_path(path);
@@ -729,13 +730,13 @@ again:
729 del_nr++; 730 del_nr++;
730 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 731 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
731 0, root->root_key.objectid, 732 0, root->root_key.objectid,
732 inode->i_ino, orig_offset); 733 ino, orig_offset);
733 BUG_ON(ret); 734 BUG_ON(ret);
734 } 735 }
735 other_start = 0; 736 other_start = 0;
736 other_end = start; 737 other_end = start;
737 if (extent_mergeable(leaf, path->slots[0] - 1, 738 if (extent_mergeable(leaf, path->slots[0] - 1,
738 inode->i_ino, bytenr, orig_offset, 739 ino, bytenr, orig_offset,
739 &other_start, &other_end)) { 740 &other_start, &other_end)) {
740 if (recow) { 741 if (recow) {
741 btrfs_release_path(path); 742 btrfs_release_path(path);
@@ -746,7 +747,7 @@ again:
746 del_nr++; 747 del_nr++;
747 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 748 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
748 0, root->root_key.objectid, 749 0, root->root_key.objectid,
749 inode->i_ino, orig_offset); 750 ino, orig_offset);
750 BUG_ON(ret); 751 BUG_ON(ret);
751 } 752 }
752 if (del_nr == 0) { 753 if (del_nr == 0) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0290b0c7b00..70d45795d75 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -25,18 +25,17 @@
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "inode-map.h"
28 29
29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 30#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 31#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
31 32
32static void recalculate_thresholds(struct btrfs_block_group_cache 33static int link_free_space(struct btrfs_free_space_ctl *ctl,
33 *block_group);
34static int link_free_space(struct btrfs_block_group_cache *block_group,
35 struct btrfs_free_space *info); 34 struct btrfs_free_space *info);
36 35
37struct inode *lookup_free_space_inode(struct btrfs_root *root, 36static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_block_group_cache 37 struct btrfs_path *path,
39 *block_group, struct btrfs_path *path) 38 u64 offset)
40{ 39{
41 struct btrfs_key key; 40 struct btrfs_key key;
42 struct btrfs_key location; 41 struct btrfs_key location;
@@ -46,15 +45,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
46 struct inode *inode = NULL; 45 struct inode *inode = NULL;
47 int ret; 46 int ret;
48 47
49 spin_lock(&block_group->lock);
50 if (block_group->inode)
51 inode = igrab(block_group->inode);
52 spin_unlock(&block_group->lock);
53 if (inode)
54 return inode;
55
56 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 48 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
57 key.offset = block_group->key.objectid; 49 key.offset = offset;
58 key.type = 0; 50 key.type = 0;
59 51
60 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 52 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -84,6 +76,27 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 76
85 inode->i_mapping->flags &= ~__GFP_FS; 77 inode->i_mapping->flags &= ~__GFP_FS;
86 78
79 return inode;
80}
81
82struct inode *lookup_free_space_inode(struct btrfs_root *root,
83 struct btrfs_block_group_cache
84 *block_group, struct btrfs_path *path)
85{
86 struct inode *inode = NULL;
87
88 spin_lock(&block_group->lock);
89 if (block_group->inode)
90 inode = igrab(block_group->inode);
91 spin_unlock(&block_group->lock);
92 if (inode)
93 return inode;
94
95 inode = __lookup_free_space_inode(root, path,
96 block_group->key.objectid);
97 if (IS_ERR(inode))
98 return inode;
99
87 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
88 if (!root->fs_info->closing) { 101 if (!root->fs_info->closing) {
89 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
@@ -94,24 +107,18 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
94 return inode; 107 return inode;
95} 108}
96 109
97int create_free_space_inode(struct btrfs_root *root, 110int __create_free_space_inode(struct btrfs_root *root,
98 struct btrfs_trans_handle *trans, 111 struct btrfs_trans_handle *trans,
99 struct btrfs_block_group_cache *block_group, 112 struct btrfs_path *path, u64 ino, u64 offset)
100 struct btrfs_path *path)
101{ 113{
102 struct btrfs_key key; 114 struct btrfs_key key;
103 struct btrfs_disk_key disk_key; 115 struct btrfs_disk_key disk_key;
104 struct btrfs_free_space_header *header; 116 struct btrfs_free_space_header *header;
105 struct btrfs_inode_item *inode_item; 117 struct btrfs_inode_item *inode_item;
106 struct extent_buffer *leaf; 118 struct extent_buffer *leaf;
107 u64 objectid;
108 int ret; 119 int ret;
109 120
110 ret = btrfs_find_free_objectid(trans, root, 0, &objectid); 121 ret = btrfs_insert_empty_inode(trans, root, path, ino);
111 if (ret < 0)
112 return ret;
113
114 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
115 if (ret) 122 if (ret)
116 return ret; 123 return ret;
117 124
@@ -131,13 +138,12 @@ int create_free_space_inode(struct btrfs_root *root,
131 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); 138 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
132 btrfs_set_inode_nlink(leaf, inode_item, 1); 139 btrfs_set_inode_nlink(leaf, inode_item, 1);
133 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 140 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
134 btrfs_set_inode_block_group(leaf, inode_item, 141 btrfs_set_inode_block_group(leaf, inode_item, offset);
135 block_group->key.objectid);
136 btrfs_mark_buffer_dirty(leaf); 142 btrfs_mark_buffer_dirty(leaf);
137 btrfs_release_path(path); 143 btrfs_release_path(path);
138 144
139 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 145 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
140 key.offset = block_group->key.objectid; 146 key.offset = offset;
141 key.type = 0; 147 key.type = 0;
142 148
143 ret = btrfs_insert_empty_item(trans, root, path, &key, 149 ret = btrfs_insert_empty_item(trans, root, path, &key,
@@ -157,6 +163,22 @@ int create_free_space_inode(struct btrfs_root *root,
157 return 0; 163 return 0;
158} 164}
159 165
166int create_free_space_inode(struct btrfs_root *root,
167 struct btrfs_trans_handle *trans,
168 struct btrfs_block_group_cache *block_group,
169 struct btrfs_path *path)
170{
171 int ret;
172 u64 ino;
173
174 ret = btrfs_find_free_objectid(root, &ino);
175 if (ret < 0)
176 return ret;
177
178 return __create_free_space_inode(root, trans, path, ino,
179 block_group->key.objectid);
180}
181
160int btrfs_truncate_free_space_cache(struct btrfs_root *root, 182int btrfs_truncate_free_space_cache(struct btrfs_root *root,
161 struct btrfs_trans_handle *trans, 183 struct btrfs_trans_handle *trans,
162 struct btrfs_path *path, 184 struct btrfs_path *path,
@@ -187,7 +209,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
187 return ret; 209 return ret;
188 } 210 }
189 211
190 return btrfs_update_inode(trans, root, inode); 212 ret = btrfs_update_inode(trans, root, inode);
213 return ret;
191} 214}
192 215
193static int readahead_cache(struct inode *inode) 216static int readahead_cache(struct inode *inode)
@@ -209,15 +232,13 @@ static int readahead_cache(struct inode *inode)
209 return 0; 232 return 0;
210} 233}
211 234
212int load_free_space_cache(struct btrfs_fs_info *fs_info, 235int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
213 struct btrfs_block_group_cache *block_group) 236 struct btrfs_free_space_ctl *ctl,
237 struct btrfs_path *path, u64 offset)
214{ 238{
215 struct btrfs_root *root = fs_info->tree_root;
216 struct inode *inode;
217 struct btrfs_free_space_header *header; 239 struct btrfs_free_space_header *header;
218 struct extent_buffer *leaf; 240 struct extent_buffer *leaf;
219 struct page *page; 241 struct page *page;
220 struct btrfs_path *path;
221 u32 *checksums = NULL, *crc; 242 u32 *checksums = NULL, *crc;
222 char *disk_crcs = NULL; 243 char *disk_crcs = NULL;
223 struct btrfs_key key; 244 struct btrfs_key key;
@@ -225,76 +246,47 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
225 u64 num_entries; 246 u64 num_entries;
226 u64 num_bitmaps; 247 u64 num_bitmaps;
227 u64 generation; 248 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
229 u32 cur_crc = ~(u32)0; 249 u32 cur_crc = ~(u32)0;
230 pgoff_t index = 0; 250 pgoff_t index = 0;
231 unsigned long first_page_offset; 251 unsigned long first_page_offset;
232 int num_checksums; 252 int num_checksums;
233 int ret = 0; 253 int ret = 0, ret2;
234
235 /*
236 * If we're unmounting then just return, since this does a search on the
237 * normal root and not the commit root and we could deadlock.
238 */
239 smp_mb();
240 if (fs_info->closing)
241 return 0;
242
243 /*
244 * If this block group has been marked to be cleared for one reason or
245 * another then we can't trust the on disk cache, so just return.
246 */
247 spin_lock(&block_group->lock);
248 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
249 spin_unlock(&block_group->lock);
250 return 0;
251 }
252 spin_unlock(&block_group->lock);
253 254
254 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
255 256
256 path = btrfs_alloc_path();
257 if (!path)
258 return 0;
259
260 inode = lookup_free_space_inode(root, block_group, path);
261 if (IS_ERR(inode)) {
262 btrfs_free_path(path);
263 return 0;
264 }
265
266 /* Nothing in the space cache, goodbye */ 257 /* Nothing in the space cache, goodbye */
267 if (!i_size_read(inode)) { 258 if (!i_size_read(inode))
268 btrfs_free_path(path);
269 goto out; 259 goto out;
270 }
271 260
272 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 261 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
273 key.offset = block_group->key.objectid; 262 key.offset = offset;
274 key.type = 0; 263 key.type = 0;
275 264
276 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 265 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
277 if (ret) { 266 if (ret < 0)
278 btrfs_free_path(path); 267 goto out;
268 else if (ret > 0) {
269 btrfs_release_path(path);
270 ret = 0;
279 goto out; 271 goto out;
280 } 272 }
281 273
274 ret = -1;
275
282 leaf = path->nodes[0]; 276 leaf = path->nodes[0];
283 header = btrfs_item_ptr(leaf, path->slots[0], 277 header = btrfs_item_ptr(leaf, path->slots[0],
284 struct btrfs_free_space_header); 278 struct btrfs_free_space_header);
285 num_entries = btrfs_free_space_entries(leaf, header); 279 num_entries = btrfs_free_space_entries(leaf, header);
286 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 280 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287 generation = btrfs_free_space_generation(leaf, header); 281 generation = btrfs_free_space_generation(leaf, header);
288 btrfs_free_path(path); 282 btrfs_release_path(path);
289 283
290 if (BTRFS_I(inode)->generation != generation) { 284 if (BTRFS_I(inode)->generation != generation) {
291 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 285 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
292 " not match free space cache generation (%llu) for " 286 " not match free space cache generation (%llu)\n",
293 "block group %llu\n",
294 (unsigned long long)BTRFS_I(inode)->generation, 287 (unsigned long long)BTRFS_I(inode)->generation,
295 (unsigned long long)generation, 288 (unsigned long long)generation);
296 (unsigned long long)block_group->key.objectid); 289 goto out;
297 goto free_cache;
298 } 290 }
299 291
300 if (!num_entries) 292 if (!num_entries)
@@ -311,10 +303,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
311 goto out; 303 goto out;
312 304
313 ret = readahead_cache(inode); 305 ret = readahead_cache(inode);
314 if (ret) { 306 if (ret)
315 ret = 0;
316 goto out; 307 goto out;
317 }
318 308
319 while (1) { 309 while (1) {
320 struct btrfs_free_space_entry *entry; 310 struct btrfs_free_space_entry *entry;
@@ -333,10 +323,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
333 } 323 }
334 324
335 page = grab_cache_page(inode->i_mapping, index); 325 page = grab_cache_page(inode->i_mapping, index);
336 if (!page) { 326 if (!page)
337 ret = 0;
338 goto free_cache; 327 goto free_cache;
339 }
340 328
341 if (!PageUptodate(page)) { 329 if (!PageUptodate(page)) {
342 btrfs_readpage(NULL, page); 330 btrfs_readpage(NULL, page);
@@ -345,9 +333,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
345 unlock_page(page); 333 unlock_page(page);
346 page_cache_release(page); 334 page_cache_release(page);
347 printk(KERN_ERR "btrfs: error reading free " 335 printk(KERN_ERR "btrfs: error reading free "
348 "space cache: %llu\n", 336 "space cache\n");
349 (unsigned long long)
350 block_group->key.objectid);
351 goto free_cache; 337 goto free_cache;
352 } 338 }
353 } 339 }
@@ -360,13 +346,10 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
360 gen = addr + (sizeof(u32) * num_checksums); 346 gen = addr + (sizeof(u32) * num_checksums);
361 if (*gen != BTRFS_I(inode)->generation) { 347 if (*gen != BTRFS_I(inode)->generation) {
362 printk(KERN_ERR "btrfs: space cache generation" 348 printk(KERN_ERR "btrfs: space cache generation"
363 " (%llu) does not match inode (%llu) " 349 " (%llu) does not match inode (%llu)\n",
364 "for block group %llu\n",
365 (unsigned long long)*gen, 350 (unsigned long long)*gen,
366 (unsigned long long) 351 (unsigned long long)
367 BTRFS_I(inode)->generation, 352 BTRFS_I(inode)->generation);
368 (unsigned long long)
369 block_group->key.objectid);
370 kunmap(page); 353 kunmap(page);
371 unlock_page(page); 354 unlock_page(page);
372 page_cache_release(page); 355 page_cache_release(page);
@@ -382,9 +365,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
382 PAGE_CACHE_SIZE - start_offset); 365 PAGE_CACHE_SIZE - start_offset);
383 btrfs_csum_final(cur_crc, (char *)&cur_crc); 366 btrfs_csum_final(cur_crc, (char *)&cur_crc);
384 if (cur_crc != *crc) { 367 if (cur_crc != *crc) {
385 printk(KERN_ERR "btrfs: crc mismatch for page %lu in " 368 printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
386 "block group %llu\n", index, 369 index);
387 (unsigned long long)block_group->key.objectid);
388 kunmap(page); 370 kunmap(page);
389 unlock_page(page); 371 unlock_page(page);
390 page_cache_release(page); 372 page_cache_release(page);
@@ -417,9 +399,9 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
417 } 399 }
418 400
419 if (entry->type == BTRFS_FREE_SPACE_EXTENT) { 401 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
420 spin_lock(&block_group->tree_lock); 402 spin_lock(&ctl->tree_lock);
421 ret = link_free_space(block_group, e); 403 ret = link_free_space(ctl, e);
422 spin_unlock(&block_group->tree_lock); 404 spin_unlock(&ctl->tree_lock);
423 BUG_ON(ret); 405 BUG_ON(ret);
424 } else { 406 } else {
425 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 407 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
@@ -431,11 +413,11 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
431 page_cache_release(page); 413 page_cache_release(page);
432 goto free_cache; 414 goto free_cache;
433 } 415 }
434 spin_lock(&block_group->tree_lock); 416 spin_lock(&ctl->tree_lock);
435 ret = link_free_space(block_group, e); 417 ret2 = link_free_space(ctl, e);
436 block_group->total_bitmaps++; 418 ctl->total_bitmaps++;
437 recalculate_thresholds(block_group); 419 ctl->op->recalc_thresholds(ctl);
438 spin_unlock(&block_group->tree_lock); 420 spin_unlock(&ctl->tree_lock);
439 list_add_tail(&e->list, &bitmaps); 421 list_add_tail(&e->list, &bitmaps);
440 } 422 }
441 423
@@ -471,41 +453,97 @@ next:
471 index++; 453 index++;
472 } 454 }
473 455
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
485 ret = 1; 456 ret = 1;
486out: 457out:
487 kfree(checksums); 458 kfree(checksums);
488 kfree(disk_crcs); 459 kfree(disk_crcs);
489 iput(inode);
490 return ret; 460 return ret;
491
492free_cache: 461free_cache:
493 /* This cache is bogus, make sure it gets cleared */ 462 __btrfs_remove_free_space_cache(ctl);
463 goto out;
464}
465
466int load_free_space_cache(struct btrfs_fs_info *fs_info,
467 struct btrfs_block_group_cache *block_group)
468{
469 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
470 struct btrfs_root *root = fs_info->tree_root;
471 struct inode *inode;
472 struct btrfs_path *path;
473 int ret;
474 bool matched;
475 u64 used = btrfs_block_group_used(&block_group->item);
476
477 /*
478 * If we're unmounting then just return, since this does a search on the
479 * normal root and not the commit root and we could deadlock.
480 */
481 smp_mb();
482 if (fs_info->closing)
483 return 0;
484
485 /*
486 * If this block group has been marked to be cleared for one reason or
487 * another then we can't trust the on disk cache, so just return.
488 */
494 spin_lock(&block_group->lock); 489 spin_lock(&block_group->lock);
495 block_group->disk_cache_state = BTRFS_DC_CLEAR; 490 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
491 spin_unlock(&block_group->lock);
492 return 0;
493 }
496 spin_unlock(&block_group->lock); 494 spin_unlock(&block_group->lock);
497 btrfs_remove_free_space_cache(block_group); 495
498 goto out; 496 path = btrfs_alloc_path();
497 if (!path)
498 return 0;
499
500 inode = lookup_free_space_inode(root, block_group, path);
501 if (IS_ERR(inode)) {
502 btrfs_free_path(path);
503 return 0;
504 }
505
506 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
507 path, block_group->key.objectid);
508 btrfs_free_path(path);
509 if (ret <= 0)
510 goto out;
511
512 spin_lock(&ctl->tree_lock);
513 matched = (ctl->free_space == (block_group->key.offset - used -
514 block_group->bytes_super));
515 spin_unlock(&ctl->tree_lock);
516
517 if (!matched) {
518 __btrfs_remove_free_space_cache(ctl);
519 printk(KERN_ERR "block group %llu has an wrong amount of free "
520 "space\n", block_group->key.objectid);
521 ret = -1;
522 }
523out:
524 if (ret < 0) {
525 /* This cache is bogus, make sure it gets cleared */
526 spin_lock(&block_group->lock);
527 block_group->disk_cache_state = BTRFS_DC_CLEAR;
528 spin_unlock(&block_group->lock);
529 ret = 0;
530
531 printk(KERN_ERR "btrfs: failed to load free space cache "
532 "for block group %llu\n", block_group->key.objectid);
533 }
534
535 iput(inode);
536 return ret;
499} 537}
500 538
501int btrfs_write_out_cache(struct btrfs_root *root, 539int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
502 struct btrfs_trans_handle *trans, 540 struct btrfs_free_space_ctl *ctl,
503 struct btrfs_block_group_cache *block_group, 541 struct btrfs_block_group_cache *block_group,
504 struct btrfs_path *path) 542 struct btrfs_trans_handle *trans,
543 struct btrfs_path *path, u64 offset)
505{ 544{
506 struct btrfs_free_space_header *header; 545 struct btrfs_free_space_header *header;
507 struct extent_buffer *leaf; 546 struct extent_buffer *leaf;
508 struct inode *inode;
509 struct rb_node *node; 547 struct rb_node *node;
510 struct list_head *pos, *n; 548 struct list_head *pos, *n;
511 struct page **pages; 549 struct page **pages;
@@ -522,35 +560,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
522 int index = 0, num_pages = 0; 560 int index = 0, num_pages = 0;
523 int entries = 0; 561 int entries = 0;
524 int bitmaps = 0; 562 int bitmaps = 0;
525 int ret = 0; 563 int ret = -1;
526 bool next_page = false; 564 bool next_page = false;
527 bool out_of_space = false; 565 bool out_of_space = false;
528 566
529 root = root->fs_info->tree_root;
530
531 INIT_LIST_HEAD(&bitmap_list); 567 INIT_LIST_HEAD(&bitmap_list);
532 568
533 spin_lock(&block_group->lock); 569 node = rb_first(&ctl->free_space_offset);
534 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 570 if (!node)
535 spin_unlock(&block_group->lock);
536 return 0;
537 }
538 spin_unlock(&block_group->lock);
539
540 inode = lookup_free_space_inode(root, block_group, path);
541 if (IS_ERR(inode))
542 return 0;
543
544 if (!i_size_read(inode)) {
545 iput(inode);
546 return 0; 571 return 0;
547 }
548 572
549 node = rb_first(&block_group->free_space_offset); 573 if (!i_size_read(inode))
550 if (!node) { 574 return -1;
551 iput(inode);
552 return 0;
553 }
554 575
555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 576 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT; 577 PAGE_CACHE_SHIFT;
@@ -560,16 +581,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
560 581
561 /* We need a checksum per page. */ 582 /* We need a checksum per page. */
562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 583 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
563 if (!crc) { 584 if (!crc)
564 iput(inode); 585 return -1;
565 return 0;
566 }
567 586
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 587 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) { 588 if (!pages) {
570 kfree(crc); 589 kfree(crc);
571 iput(inode); 590 return -1;
572 return 0;
573 } 591 }
574 592
575 /* Since the first page has all of our checksums and our generation we 593 /* Since the first page has all of our checksums and our generation we
@@ -579,7 +597,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); 597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580 598
581 /* Get the cluster for this block_group if it exists */ 599 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list)) 600 if (block_group && !list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next, 601 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster, 602 struct btrfs_free_cluster,
585 block_group_list); 603 block_group_list);
@@ -621,7 +639,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
621 * When searching for pinned extents, we need to start at our start 639 * When searching for pinned extents, we need to start at our start
622 * offset. 640 * offset.
623 */ 641 */
624 start = block_group->key.objectid; 642 if (block_group)
643 start = block_group->key.objectid;
625 644
626 /* Write out the extent entries */ 645 /* Write out the extent entries */
627 do { 646 do {
@@ -679,8 +698,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
679 * We want to add any pinned extents to our free space cache 698 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space 699 * so we don't leak the space
681 */ 700 */
682 while (!next_page && (start < block_group->key.objectid + 701 while (block_group && !next_page &&
683 block_group->key.offset)) { 702 (start < block_group->key.objectid +
703 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end, 704 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY); 705 EXTENT_DIRTY);
686 if (ret) { 706 if (ret) {
@@ -798,12 +818,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
798 filemap_write_and_wait(inode->i_mapping); 818 filemap_write_and_wait(inode->i_mapping);
799 819
800 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 820 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
801 key.offset = block_group->key.objectid; 821 key.offset = offset;
802 key.type = 0; 822 key.type = 0;
803 823
804 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 824 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
805 if (ret < 0) { 825 if (ret < 0) {
806 ret = 0; 826 ret = -1;
807 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 827 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
808 EXTENT_DIRTY | EXTENT_DELALLOC | 828 EXTENT_DIRTY | EXTENT_DELALLOC |
809 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 829 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
@@ -816,8 +836,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
816 path->slots[0]--; 836 path->slots[0]--;
817 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 837 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
818 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 838 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
819 found_key.offset != block_group->key.objectid) { 839 found_key.offset != offset) {
820 ret = 0; 840 ret = -1;
821 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 841 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
822 EXTENT_DIRTY | EXTENT_DELALLOC | 842 EXTENT_DIRTY | EXTENT_DELALLOC |
823 EXTENT_DO_ACCOUNTING, 0, 0, NULL, 843 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
@@ -837,44 +857,78 @@ int btrfs_write_out_cache(struct btrfs_root *root,
837 ret = 1; 857 ret = 1;
838 858
839out_free: 859out_free:
840 if (ret == 0) { 860 if (ret != 1) {
841 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 861 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
842 spin_lock(&block_group->lock);
843 block_group->disk_cache_state = BTRFS_DC_ERROR;
844 spin_unlock(&block_group->lock);
845 BTRFS_I(inode)->generation = 0; 862 BTRFS_I(inode)->generation = 0;
846 } 863 }
847 kfree(checksums); 864 kfree(checksums);
848 kfree(pages); 865 kfree(pages);
849 btrfs_update_inode(trans, root, inode); 866 btrfs_update_inode(trans, root, inode);
867 return ret;
868}
869
870int btrfs_write_out_cache(struct btrfs_root *root,
871 struct btrfs_trans_handle *trans,
872 struct btrfs_block_group_cache *block_group,
873 struct btrfs_path *path)
874{
875 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
876 struct inode *inode;
877 int ret = 0;
878
879 root = root->fs_info->tree_root;
880
881 spin_lock(&block_group->lock);
882 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
883 spin_unlock(&block_group->lock);
884 return 0;
885 }
886 spin_unlock(&block_group->lock);
887
888 inode = lookup_free_space_inode(root, block_group, path);
889 if (IS_ERR(inode))
890 return 0;
891
892 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
893 path, block_group->key.objectid);
894 if (ret < 0) {
895 spin_lock(&block_group->lock);
896 block_group->disk_cache_state = BTRFS_DC_ERROR;
897 spin_unlock(&block_group->lock);
898 ret = 0;
899
900 printk(KERN_ERR "btrfs: failed to write free space cace "
901 "for block group %llu\n", block_group->key.objectid);
902 }
903
850 iput(inode); 904 iput(inode);
851 return ret; 905 return ret;
852} 906}
853 907
854static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 908static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
855 u64 offset) 909 u64 offset)
856{ 910{
857 BUG_ON(offset < bitmap_start); 911 BUG_ON(offset < bitmap_start);
858 offset -= bitmap_start; 912 offset -= bitmap_start;
859 return (unsigned long)(div64_u64(offset, sectorsize)); 913 return (unsigned long)(div_u64(offset, unit));
860} 914}
861 915
862static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 916static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
863{ 917{
864 return (unsigned long)(div64_u64(bytes, sectorsize)); 918 return (unsigned long)(div_u64(bytes, unit));
865} 919}
866 920
867static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 921static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
868 u64 offset) 922 u64 offset)
869{ 923{
870 u64 bitmap_start; 924 u64 bitmap_start;
871 u64 bytes_per_bitmap; 925 u64 bytes_per_bitmap;
872 926
873 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 927 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
874 bitmap_start = offset - block_group->key.objectid; 928 bitmap_start = offset - ctl->start;
875 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 929 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
876 bitmap_start *= bytes_per_bitmap; 930 bitmap_start *= bytes_per_bitmap;
877 bitmap_start += block_group->key.objectid; 931 bitmap_start += ctl->start;
878 932
879 return bitmap_start; 933 return bitmap_start;
880} 934}
@@ -932,10 +986,10 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
932 * offset. 986 * offset.
933 */ 987 */
934static struct btrfs_free_space * 988static struct btrfs_free_space *
935tree_search_offset(struct btrfs_block_group_cache *block_group, 989tree_search_offset(struct btrfs_free_space_ctl *ctl,
936 u64 offset, int bitmap_only, int fuzzy) 990 u64 offset, int bitmap_only, int fuzzy)
937{ 991{
938 struct rb_node *n = block_group->free_space_offset.rb_node; 992 struct rb_node *n = ctl->free_space_offset.rb_node;
939 struct btrfs_free_space *entry, *prev = NULL; 993 struct btrfs_free_space *entry, *prev = NULL;
940 994
941 /* find entry that is closest to the 'offset' */ 995 /* find entry that is closest to the 'offset' */
@@ -1031,8 +1085,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1031 break; 1085 break;
1032 } 1086 }
1033 } 1087 }
1034 if (entry->offset + BITS_PER_BITMAP * 1088 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1035 block_group->sectorsize > offset)
1036 return entry; 1089 return entry;
1037 } else if (entry->offset + entry->bytes > offset) 1090 } else if (entry->offset + entry->bytes > offset)
1038 return entry; 1091 return entry;
@@ -1043,7 +1096,7 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1043 while (1) { 1096 while (1) {
1044 if (entry->bitmap) { 1097 if (entry->bitmap) {
1045 if (entry->offset + BITS_PER_BITMAP * 1098 if (entry->offset + BITS_PER_BITMAP *
1046 block_group->sectorsize > offset) 1099 ctl->unit > offset)
1047 break; 1100 break;
1048 } else { 1101 } else {
1049 if (entry->offset + entry->bytes > offset) 1102 if (entry->offset + entry->bytes > offset)
@@ -1059,42 +1112,47 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
1059} 1112}
1060 1113
1061static inline void 1114static inline void
1062__unlink_free_space(struct btrfs_block_group_cache *block_group, 1115__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1063 struct btrfs_free_space *info) 1116 struct btrfs_free_space *info)
1064{ 1117{
1065 rb_erase(&info->offset_index, &block_group->free_space_offset); 1118 rb_erase(&info->offset_index, &ctl->free_space_offset);
1066 block_group->free_extents--; 1119 ctl->free_extents--;
1067} 1120}
1068 1121
1069static void unlink_free_space(struct btrfs_block_group_cache *block_group, 1122static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1070 struct btrfs_free_space *info) 1123 struct btrfs_free_space *info)
1071{ 1124{
1072 __unlink_free_space(block_group, info); 1125 __unlink_free_space(ctl, info);
1073 block_group->free_space -= info->bytes; 1126 ctl->free_space -= info->bytes;
1074} 1127}
1075 1128
1076static int link_free_space(struct btrfs_block_group_cache *block_group, 1129static int link_free_space(struct btrfs_free_space_ctl *ctl,
1077 struct btrfs_free_space *info) 1130 struct btrfs_free_space *info)
1078{ 1131{
1079 int ret = 0; 1132 int ret = 0;
1080 1133
1081 BUG_ON(!info->bitmap && !info->bytes); 1134 BUG_ON(!info->bitmap && !info->bytes);
1082 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 1135 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1083 &info->offset_index, (info->bitmap != NULL)); 1136 &info->offset_index, (info->bitmap != NULL));
1084 if (ret) 1137 if (ret)
1085 return ret; 1138 return ret;
1086 1139
1087 block_group->free_space += info->bytes; 1140 ctl->free_space += info->bytes;
1088 block_group->free_extents++; 1141 ctl->free_extents++;
1089 return ret; 1142 return ret;
1090} 1143}
1091 1144
1092static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 1145static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1093{ 1146{
1147 struct btrfs_block_group_cache *block_group = ctl->private;
1094 u64 max_bytes; 1148 u64 max_bytes;
1095 u64 bitmap_bytes; 1149 u64 bitmap_bytes;
1096 u64 extent_bytes; 1150 u64 extent_bytes;
1097 u64 size = block_group->key.offset; 1151 u64 size = block_group->key.offset;
1152 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1153 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1154
1155 BUG_ON(ctl->total_bitmaps > max_bitmaps);
1098 1156
1099 /* 1157 /*
1100 * The goal is to keep the total amount of memory used per 1gb of space 1158 * The goal is to keep the total amount of memory used per 1gb of space
@@ -1112,10 +1170,10 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1112 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1170 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1113 * we add more bitmaps. 1171 * we add more bitmaps.
1114 */ 1172 */
1115 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1173 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1116 1174
1117 if (bitmap_bytes >= max_bytes) { 1175 if (bitmap_bytes >= max_bytes) {
1118 block_group->extents_thresh = 0; 1176 ctl->extents_thresh = 0;
1119 return; 1177 return;
1120 } 1178 }
1121 1179
@@ -1126,47 +1184,43 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1126 extent_bytes = max_bytes - bitmap_bytes; 1184 extent_bytes = max_bytes - bitmap_bytes;
1127 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1185 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1128 1186
1129 block_group->extents_thresh = 1187 ctl->extents_thresh =
1130 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1188 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1131} 1189}
1132 1190
1133static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 1191static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1134 struct btrfs_free_space *info, u64 offset, 1192 struct btrfs_free_space *info, u64 offset,
1135 u64 bytes) 1193 u64 bytes)
1136{ 1194{
1137 unsigned long start, end; 1195 unsigned long start, count;
1138 unsigned long i;
1139 1196
1140 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1197 start = offset_to_bit(info->offset, ctl->unit, offset);
1141 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1198 count = bytes_to_bits(bytes, ctl->unit);
1142 BUG_ON(end > BITS_PER_BITMAP); 1199 BUG_ON(start + count > BITS_PER_BITMAP);
1143 1200
1144 for (i = start; i < end; i++) 1201 bitmap_clear(info->bitmap, start, count);
1145 clear_bit(i, info->bitmap);
1146 1202
1147 info->bytes -= bytes; 1203 info->bytes -= bytes;
1148 block_group->free_space -= bytes; 1204 ctl->free_space -= bytes;
1149} 1205}
1150 1206
1151static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 1207static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1152 struct btrfs_free_space *info, u64 offset, 1208 struct btrfs_free_space *info, u64 offset,
1153 u64 bytes) 1209 u64 bytes)
1154{ 1210{
1155 unsigned long start, end; 1211 unsigned long start, count;
1156 unsigned long i;
1157 1212
1158 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 1213 start = offset_to_bit(info->offset, ctl->unit, offset);
1159 end = start + bytes_to_bits(bytes, block_group->sectorsize); 1214 count = bytes_to_bits(bytes, ctl->unit);
1160 BUG_ON(end > BITS_PER_BITMAP); 1215 BUG_ON(start + count > BITS_PER_BITMAP);
1161 1216
1162 for (i = start; i < end; i++) 1217 bitmap_set(info->bitmap, start, count);
1163 set_bit(i, info->bitmap);
1164 1218
1165 info->bytes += bytes; 1219 info->bytes += bytes;
1166 block_group->free_space += bytes; 1220 ctl->free_space += bytes;
1167} 1221}
1168 1222
1169static int search_bitmap(struct btrfs_block_group_cache *block_group, 1223static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1170 struct btrfs_free_space *bitmap_info, u64 *offset, 1224 struct btrfs_free_space *bitmap_info, u64 *offset,
1171 u64 *bytes) 1225 u64 *bytes)
1172{ 1226{
@@ -1174,9 +1228,9 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1174 unsigned long bits, i; 1228 unsigned long bits, i;
1175 unsigned long next_zero; 1229 unsigned long next_zero;
1176 1230
1177 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 1231 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1178 max_t(u64, *offset, bitmap_info->offset)); 1232 max_t(u64, *offset, bitmap_info->offset));
1179 bits = bytes_to_bits(*bytes, block_group->sectorsize); 1233 bits = bytes_to_bits(*bytes, ctl->unit);
1180 1234
1181 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1235 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1182 i < BITS_PER_BITMAP; 1236 i < BITS_PER_BITMAP;
@@ -1191,29 +1245,25 @@ static int search_bitmap(struct btrfs_block_group_cache *block_group,
1191 } 1245 }
1192 1246
1193 if (found_bits) { 1247 if (found_bits) {
1194 *offset = (u64)(i * block_group->sectorsize) + 1248 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1195 bitmap_info->offset; 1249 *bytes = (u64)(found_bits) * ctl->unit;
1196 *bytes = (u64)(found_bits) * block_group->sectorsize;
1197 return 0; 1250 return 0;
1198 } 1251 }
1199 1252
1200 return -1; 1253 return -1;
1201} 1254}
1202 1255
1203static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 1256static struct btrfs_free_space *
1204 *block_group, u64 *offset, 1257find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1205 u64 *bytes, int debug)
1206{ 1258{
1207 struct btrfs_free_space *entry; 1259 struct btrfs_free_space *entry;
1208 struct rb_node *node; 1260 struct rb_node *node;
1209 int ret; 1261 int ret;
1210 1262
1211 if (!block_group->free_space_offset.rb_node) 1263 if (!ctl->free_space_offset.rb_node)
1212 return NULL; 1264 return NULL;
1213 1265
1214 entry = tree_search_offset(block_group, 1266 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1215 offset_to_bitmap(block_group, *offset),
1216 0, 1);
1217 if (!entry) 1267 if (!entry)
1218 return NULL; 1268 return NULL;
1219 1269
@@ -1223,7 +1273,7 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1223 continue; 1273 continue;
1224 1274
1225 if (entry->bitmap) { 1275 if (entry->bitmap) {
1226 ret = search_bitmap(block_group, entry, offset, bytes); 1276 ret = search_bitmap(ctl, entry, offset, bytes);
1227 if (!ret) 1277 if (!ret)
1228 return entry; 1278 return entry;
1229 continue; 1279 continue;
@@ -1237,33 +1287,28 @@ static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1237 return NULL; 1287 return NULL;
1238} 1288}
1239 1289
1240static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 1290static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1241 struct btrfs_free_space *info, u64 offset) 1291 struct btrfs_free_space *info, u64 offset)
1242{ 1292{
1243 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1293 info->offset = offset_to_bitmap(ctl, offset);
1244 int max_bitmaps = (int)div64_u64(block_group->key.offset +
1245 bytes_per_bg - 1, bytes_per_bg);
1246 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
1247
1248 info->offset = offset_to_bitmap(block_group, offset);
1249 info->bytes = 0; 1294 info->bytes = 0;
1250 link_free_space(block_group, info); 1295 link_free_space(ctl, info);
1251 block_group->total_bitmaps++; 1296 ctl->total_bitmaps++;
1252 1297
1253 recalculate_thresholds(block_group); 1298 ctl->op->recalc_thresholds(ctl);
1254} 1299}
1255 1300
1256static void free_bitmap(struct btrfs_block_group_cache *block_group, 1301static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1257 struct btrfs_free_space *bitmap_info) 1302 struct btrfs_free_space *bitmap_info)
1258{ 1303{
1259 unlink_free_space(block_group, bitmap_info); 1304 unlink_free_space(ctl, bitmap_info);
1260 kfree(bitmap_info->bitmap); 1305 kfree(bitmap_info->bitmap);
1261 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1306 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1262 block_group->total_bitmaps--; 1307 ctl->total_bitmaps--;
1263 recalculate_thresholds(block_group); 1308 ctl->op->recalc_thresholds(ctl);
1264} 1309}
1265 1310
1266static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 1311static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1267 struct btrfs_free_space *bitmap_info, 1312 struct btrfs_free_space *bitmap_info,
1268 u64 *offset, u64 *bytes) 1313 u64 *offset, u64 *bytes)
1269{ 1314{
@@ -1272,8 +1317,7 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro
1272 int ret; 1317 int ret;
1273 1318
1274again: 1319again:
1275 end = bitmap_info->offset + 1320 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1276 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
1277 1321
1278 /* 1322 /*
1279 * XXX - this can go away after a few releases. 1323 * XXX - this can go away after a few releases.
@@ -1288,24 +1332,22 @@ again:
1288 search_start = *offset; 1332 search_start = *offset;
1289 search_bytes = *bytes; 1333 search_bytes = *bytes;
1290 search_bytes = min(search_bytes, end - search_start + 1); 1334 search_bytes = min(search_bytes, end - search_start + 1);
1291 ret = search_bitmap(block_group, bitmap_info, &search_start, 1335 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1292 &search_bytes);
1293 BUG_ON(ret < 0 || search_start != *offset); 1336 BUG_ON(ret < 0 || search_start != *offset);
1294 1337
1295 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1338 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1296 bitmap_clear_bits(block_group, bitmap_info, *offset, 1339 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1297 end - *offset + 1);
1298 *bytes -= end - *offset + 1; 1340 *bytes -= end - *offset + 1;
1299 *offset = end + 1; 1341 *offset = end + 1;
1300 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1342 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1301 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 1343 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1302 *bytes = 0; 1344 *bytes = 0;
1303 } 1345 }
1304 1346
1305 if (*bytes) { 1347 if (*bytes) {
1306 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1348 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1307 if (!bitmap_info->bytes) 1349 if (!bitmap_info->bytes)
1308 free_bitmap(block_group, bitmap_info); 1350 free_bitmap(ctl, bitmap_info);
1309 1351
1310 /* 1352 /*
1311 * no entry after this bitmap, but we still have bytes to 1353 * no entry after this bitmap, but we still have bytes to
@@ -1332,31 +1374,28 @@ again:
1332 */ 1374 */
1333 search_start = *offset; 1375 search_start = *offset;
1334 search_bytes = *bytes; 1376 search_bytes = *bytes;
1335 ret = search_bitmap(block_group, bitmap_info, &search_start, 1377 ret = search_bitmap(ctl, bitmap_info, &search_start,
1336 &search_bytes); 1378 &search_bytes);
1337 if (ret < 0 || search_start != *offset) 1379 if (ret < 0 || search_start != *offset)
1338 return -EAGAIN; 1380 return -EAGAIN;
1339 1381
1340 goto again; 1382 goto again;
1341 } else if (!bitmap_info->bytes) 1383 } else if (!bitmap_info->bytes)
1342 free_bitmap(block_group, bitmap_info); 1384 free_bitmap(ctl, bitmap_info);
1343 1385
1344 return 0; 1386 return 0;
1345} 1387}
1346 1388
1347static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 1389static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1348 struct btrfs_free_space *info) 1390 struct btrfs_free_space *info)
1349{ 1391{
1350 struct btrfs_free_space *bitmap_info; 1392 struct btrfs_block_group_cache *block_group = ctl->private;
1351 int added = 0;
1352 u64 bytes, offset, end;
1353 int ret;
1354 1393
1355 /* 1394 /*
1356 * If we are below the extents threshold then we can add this as an 1395 * If we are below the extents threshold then we can add this as an
1357 * extent, and don't have to deal with the bitmap 1396 * extent, and don't have to deal with the bitmap
1358 */ 1397 */
1359 if (block_group->free_extents < block_group->extents_thresh) { 1398 if (ctl->free_extents < ctl->extents_thresh) {
1360 /* 1399 /*
1361 * If this block group has some small extents we don't want to 1400 * If this block group has some small extents we don't want to
1362 * use up all of our free slots in the cache with them, we want 1401 * use up all of our free slots in the cache with them, we want
@@ -1365,11 +1404,10 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1365 * the overhead of a bitmap if we don't have to. 1404 * the overhead of a bitmap if we don't have to.
1366 */ 1405 */
1367 if (info->bytes <= block_group->sectorsize * 4) { 1406 if (info->bytes <= block_group->sectorsize * 4) {
1368 if (block_group->free_extents * 2 <= 1407 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1369 block_group->extents_thresh) 1408 return false;
1370 return 0;
1371 } else { 1409 } else {
1372 return 0; 1410 return false;
1373 } 1411 }
1374 } 1412 }
1375 1413
@@ -1379,31 +1417,42 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1379 */ 1417 */
1380 if (BITS_PER_BITMAP * block_group->sectorsize > 1418 if (BITS_PER_BITMAP * block_group->sectorsize >
1381 block_group->key.offset) 1419 block_group->key.offset)
1382 return 0; 1420 return false;
1421
1422 return true;
1423}
1424
1425static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1426 struct btrfs_free_space *info)
1427{
1428 struct btrfs_free_space *bitmap_info;
1429 int added = 0;
1430 u64 bytes, offset, end;
1431 int ret;
1383 1432
1384 bytes = info->bytes; 1433 bytes = info->bytes;
1385 offset = info->offset; 1434 offset = info->offset;
1386 1435
1436 if (!ctl->op->use_bitmap(ctl, info))
1437 return 0;
1438
1387again: 1439again:
1388 bitmap_info = tree_search_offset(block_group, 1440 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1389 offset_to_bitmap(block_group, offset),
1390 1, 0); 1441 1, 0);
1391 if (!bitmap_info) { 1442 if (!bitmap_info) {
1392 BUG_ON(added); 1443 BUG_ON(added);
1393 goto new_bitmap; 1444 goto new_bitmap;
1394 } 1445 }
1395 1446
1396 end = bitmap_info->offset + 1447 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1397 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
1398 1448
1399 if (offset >= bitmap_info->offset && offset + bytes > end) { 1449 if (offset >= bitmap_info->offset && offset + bytes > end) {
1400 bitmap_set_bits(block_group, bitmap_info, offset, 1450 bitmap_set_bits(ctl, bitmap_info, offset, end - offset);
1401 end - offset);
1402 bytes -= end - offset; 1451 bytes -= end - offset;
1403 offset = end; 1452 offset = end;
1404 added = 0; 1453 added = 0;
1405 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 1454 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1406 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 1455 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1407 bytes = 0; 1456 bytes = 0;
1408 } else { 1457 } else {
1409 BUG(); 1458 BUG();
@@ -1417,19 +1466,19 @@ again:
1417 1466
1418new_bitmap: 1467new_bitmap:
1419 if (info && info->bitmap) { 1468 if (info && info->bitmap) {
1420 add_new_bitmap(block_group, info, offset); 1469 add_new_bitmap(ctl, info, offset);
1421 added = 1; 1470 added = 1;
1422 info = NULL; 1471 info = NULL;
1423 goto again; 1472 goto again;
1424 } else { 1473 } else {
1425 spin_unlock(&block_group->tree_lock); 1474 spin_unlock(&ctl->tree_lock);
1426 1475
1427 /* no pre-allocated info, allocate a new one */ 1476 /* no pre-allocated info, allocate a new one */
1428 if (!info) { 1477 if (!info) {
1429 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1478 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1430 GFP_NOFS); 1479 GFP_NOFS);
1431 if (!info) { 1480 if (!info) {
1432 spin_lock(&block_group->tree_lock); 1481 spin_lock(&ctl->tree_lock);
1433 ret = -ENOMEM; 1482 ret = -ENOMEM;
1434 goto out; 1483 goto out;
1435 } 1484 }
@@ -1437,7 +1486,7 @@ new_bitmap:
1437 1486
1438 /* allocate the bitmap */ 1487 /* allocate the bitmap */
1439 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1488 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1440 spin_lock(&block_group->tree_lock); 1489 spin_lock(&ctl->tree_lock);
1441 if (!info->bitmap) { 1490 if (!info->bitmap) {
1442 ret = -ENOMEM; 1491 ret = -ENOMEM;
1443 goto out; 1492 goto out;
@@ -1455,8 +1504,8 @@ out:
1455 return ret; 1504 return ret;
1456} 1505}
1457 1506
1458static bool try_merge_free_space(struct btrfs_block_group_cache *block_group, 1507static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1459 struct btrfs_free_space *info, bool update_stat) 1508 struct btrfs_free_space *info, bool update_stat)
1460{ 1509{
1461 struct btrfs_free_space *left_info; 1510 struct btrfs_free_space *left_info;
1462 struct btrfs_free_space *right_info; 1511 struct btrfs_free_space *right_info;
@@ -1469,18 +1518,18 @@ static bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1469 * are adding, if there is remove that struct and add a new one to 1518 * are adding, if there is remove that struct and add a new one to
1470 * cover the entire range 1519 * cover the entire range
1471 */ 1520 */
1472 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 1521 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1473 if (right_info && rb_prev(&right_info->offset_index)) 1522 if (right_info && rb_prev(&right_info->offset_index))
1474 left_info = rb_entry(rb_prev(&right_info->offset_index), 1523 left_info = rb_entry(rb_prev(&right_info->offset_index),
1475 struct btrfs_free_space, offset_index); 1524 struct btrfs_free_space, offset_index);
1476 else 1525 else
1477 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 1526 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1478 1527
1479 if (right_info && !right_info->bitmap) { 1528 if (right_info && !right_info->bitmap) {
1480 if (update_stat) 1529 if (update_stat)
1481 unlink_free_space(block_group, right_info); 1530 unlink_free_space(ctl, right_info);
1482 else 1531 else
1483 __unlink_free_space(block_group, right_info); 1532 __unlink_free_space(ctl, right_info);
1484 info->bytes += right_info->bytes; 1533 info->bytes += right_info->bytes;
1485 kmem_cache_free(btrfs_free_space_cachep, right_info); 1534 kmem_cache_free(btrfs_free_space_cachep, right_info);
1486 merged = true; 1535 merged = true;
@@ -1489,9 +1538,9 @@ static bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1489 if (left_info && !left_info->bitmap && 1538 if (left_info && !left_info->bitmap &&
1490 left_info->offset + left_info->bytes == offset) { 1539 left_info->offset + left_info->bytes == offset) {
1491 if (update_stat) 1540 if (update_stat)
1492 unlink_free_space(block_group, left_info); 1541 unlink_free_space(ctl, left_info);
1493 else 1542 else
1494 __unlink_free_space(block_group, left_info); 1543 __unlink_free_space(ctl, left_info);
1495 info->offset = left_info->offset; 1544 info->offset = left_info->offset;
1496 info->bytes += left_info->bytes; 1545 info->bytes += left_info->bytes;
1497 kmem_cache_free(btrfs_free_space_cachep, left_info); 1546 kmem_cache_free(btrfs_free_space_cachep, left_info);
@@ -1501,8 +1550,8 @@ static bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1501 return merged; 1550 return merged;
1502} 1551}
1503 1552
1504int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 1553int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1505 u64 offset, u64 bytes) 1554 u64 offset, u64 bytes)
1506{ 1555{
1507 struct btrfs_free_space *info; 1556 struct btrfs_free_space *info;
1508 int ret = 0; 1557 int ret = 0;
@@ -1514,9 +1563,9 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1514 info->offset = offset; 1563 info->offset = offset;
1515 info->bytes = bytes; 1564 info->bytes = bytes;
1516 1565
1517 spin_lock(&block_group->tree_lock); 1566 spin_lock(&ctl->tree_lock);
1518 1567
1519 if (try_merge_free_space(block_group, info, true)) 1568 if (try_merge_free_space(ctl, info, true))
1520 goto link; 1569 goto link;
1521 1570
1522 /* 1571 /*
@@ -1524,7 +1573,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1524 * extent then we know we're going to have to allocate a new extent, so 1573 * extent then we know we're going to have to allocate a new extent, so
1525 * before we do that see if we need to drop this into a bitmap 1574 * before we do that see if we need to drop this into a bitmap
1526 */ 1575 */
1527 ret = insert_into_bitmap(block_group, info); 1576 ret = insert_into_bitmap(ctl, info);
1528 if (ret < 0) { 1577 if (ret < 0) {
1529 goto out; 1578 goto out;
1530 } else if (ret) { 1579 } else if (ret) {
@@ -1532,11 +1581,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1532 goto out; 1581 goto out;
1533 } 1582 }
1534link: 1583link:
1535 ret = link_free_space(block_group, info); 1584 ret = link_free_space(ctl, info);
1536 if (ret) 1585 if (ret)
1537 kmem_cache_free(btrfs_free_space_cachep, info); 1586 kmem_cache_free(btrfs_free_space_cachep, info);
1538out: 1587out:
1539 spin_unlock(&block_group->tree_lock); 1588 spin_unlock(&ctl->tree_lock);
1540 1589
1541 if (ret) { 1590 if (ret) {
1542 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1591 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
@@ -1549,21 +1598,21 @@ out:
1549int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1598int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1550 u64 offset, u64 bytes) 1599 u64 offset, u64 bytes)
1551{ 1600{
1601 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1552 struct btrfs_free_space *info; 1602 struct btrfs_free_space *info;
1553 struct btrfs_free_space *next_info = NULL; 1603 struct btrfs_free_space *next_info = NULL;
1554 int ret = 0; 1604 int ret = 0;
1555 1605
1556 spin_lock(&block_group->tree_lock); 1606 spin_lock(&ctl->tree_lock);
1557 1607
1558again: 1608again:
1559 info = tree_search_offset(block_group, offset, 0, 0); 1609 info = tree_search_offset(ctl, offset, 0, 0);
1560 if (!info) { 1610 if (!info) {
1561 /* 1611 /*
1562 * oops didn't find an extent that matched the space we wanted 1612 * oops didn't find an extent that matched the space we wanted
1563 * to remove, look for a bitmap instead 1613 * to remove, look for a bitmap instead
1564 */ 1614 */
1565 info = tree_search_offset(block_group, 1615 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1566 offset_to_bitmap(block_group, offset),
1567 1, 0); 1616 1, 0);
1568 if (!info) { 1617 if (!info) {
1569 WARN_ON(1); 1618 WARN_ON(1);
@@ -1578,8 +1627,8 @@ again:
1578 offset_index); 1627 offset_index);
1579 1628
1580 if (next_info->bitmap) 1629 if (next_info->bitmap)
1581 end = next_info->offset + BITS_PER_BITMAP * 1630 end = next_info->offset +
1582 block_group->sectorsize - 1; 1631 BITS_PER_BITMAP * ctl->unit - 1;
1583 else 1632 else
1584 end = next_info->offset + next_info->bytes; 1633 end = next_info->offset + next_info->bytes;
1585 1634
@@ -1599,20 +1648,20 @@ again:
1599 } 1648 }
1600 1649
1601 if (info->bytes == bytes) { 1650 if (info->bytes == bytes) {
1602 unlink_free_space(block_group, info); 1651 unlink_free_space(ctl, info);
1603 if (info->bitmap) { 1652 if (info->bitmap) {
1604 kfree(info->bitmap); 1653 kfree(info->bitmap);
1605 block_group->total_bitmaps--; 1654 ctl->total_bitmaps--;
1606 } 1655 }
1607 kmem_cache_free(btrfs_free_space_cachep, info); 1656 kmem_cache_free(btrfs_free_space_cachep, info);
1608 goto out_lock; 1657 goto out_lock;
1609 } 1658 }
1610 1659
1611 if (!info->bitmap && info->offset == offset) { 1660 if (!info->bitmap && info->offset == offset) {
1612 unlink_free_space(block_group, info); 1661 unlink_free_space(ctl, info);
1613 info->offset += bytes; 1662 info->offset += bytes;
1614 info->bytes -= bytes; 1663 info->bytes -= bytes;
1615 link_free_space(block_group, info); 1664 link_free_space(ctl, info);
1616 goto out_lock; 1665 goto out_lock;
1617 } 1666 }
1618 1667
@@ -1626,13 +1675,13 @@ again:
1626 * first unlink the old info and then 1675 * first unlink the old info and then
1627 * insert it again after the hole we're creating 1676 * insert it again after the hole we're creating
1628 */ 1677 */
1629 unlink_free_space(block_group, info); 1678 unlink_free_space(ctl, info);
1630 if (offset + bytes < info->offset + info->bytes) { 1679 if (offset + bytes < info->offset + info->bytes) {
1631 u64 old_end = info->offset + info->bytes; 1680 u64 old_end = info->offset + info->bytes;
1632 1681
1633 info->offset = offset + bytes; 1682 info->offset = offset + bytes;
1634 info->bytes = old_end - info->offset; 1683 info->bytes = old_end - info->offset;
1635 ret = link_free_space(block_group, info); 1684 ret = link_free_space(ctl, info);
1636 WARN_ON(ret); 1685 WARN_ON(ret);
1637 if (ret) 1686 if (ret)
1638 goto out_lock; 1687 goto out_lock;
@@ -1642,7 +1691,7 @@ again:
1642 */ 1691 */
1643 kmem_cache_free(btrfs_free_space_cachep, info); 1692 kmem_cache_free(btrfs_free_space_cachep, info);
1644 } 1693 }
1645 spin_unlock(&block_group->tree_lock); 1694 spin_unlock(&ctl->tree_lock);
1646 1695
1647 /* step two, insert a new info struct to cover 1696 /* step two, insert a new info struct to cover
1648 * anything before the hole 1697 * anything before the hole
@@ -1653,12 +1702,12 @@ again:
1653 goto out; 1702 goto out;
1654 } 1703 }
1655 1704
1656 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 1705 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1657 if (ret == -EAGAIN) 1706 if (ret == -EAGAIN)
1658 goto again; 1707 goto again;
1659 BUG_ON(ret); 1708 BUG_ON(ret);
1660out_lock: 1709out_lock:
1661 spin_unlock(&block_group->tree_lock); 1710 spin_unlock(&ctl->tree_lock);
1662out: 1711out:
1663 return ret; 1712 return ret;
1664} 1713}
@@ -1666,11 +1715,12 @@ out:
1666void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1715void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1667 u64 bytes) 1716 u64 bytes)
1668{ 1717{
1718 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1669 struct btrfs_free_space *info; 1719 struct btrfs_free_space *info;
1670 struct rb_node *n; 1720 struct rb_node *n;
1671 int count = 0; 1721 int count = 0;
1672 1722
1673 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 1723 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1674 info = rb_entry(n, struct btrfs_free_space, offset_index); 1724 info = rb_entry(n, struct btrfs_free_space, offset_index);
1675 if (info->bytes >= bytes) 1725 if (info->bytes >= bytes)
1676 count++; 1726 count++;
@@ -1685,6 +1735,30 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1685 "\n", count); 1735 "\n", count);
1686} 1736}
1687 1737
1738static struct btrfs_free_space_op free_space_op = {
1739 .recalc_thresholds = recalculate_thresholds,
1740 .use_bitmap = use_bitmap,
1741};
1742
1743void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1744{
1745 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1746
1747 spin_lock_init(&ctl->tree_lock);
1748 ctl->unit = block_group->sectorsize;
1749 ctl->start = block_group->key.objectid;
1750 ctl->private = block_group;
1751 ctl->op = &free_space_op;
1752
1753 /*
1754 * we only want to have 32k of ram per block group for keeping
1755 * track of free space, and if we pass 1/2 of that we want to
1756 * start converting things over to using bitmaps
1757 */
1758 ctl->extents_thresh = ((1024 * 32) / 2) /
1759 sizeof(struct btrfs_free_space);
1760}
1761
1688/* 1762/*
1689 * for a given cluster, put all of its extents back into the free 1763 * for a given cluster, put all of its extents back into the free
1690 * space cache. If the block group passed doesn't match the block group 1764 * space cache. If the block group passed doesn't match the block group
@@ -1696,6 +1770,7 @@ __btrfs_return_cluster_to_free_space(
1696 struct btrfs_block_group_cache *block_group, 1770 struct btrfs_block_group_cache *block_group,
1697 struct btrfs_free_cluster *cluster) 1771 struct btrfs_free_cluster *cluster)
1698{ 1772{
1773 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1699 struct btrfs_free_space *entry; 1774 struct btrfs_free_space *entry;
1700 struct rb_node *node; 1775 struct rb_node *node;
1701 1776
@@ -1717,8 +1792,8 @@ __btrfs_return_cluster_to_free_space(
1717 1792
1718 bitmap = (entry->bitmap != NULL); 1793 bitmap = (entry->bitmap != NULL);
1719 if (!bitmap) 1794 if (!bitmap)
1720 try_merge_free_space(block_group, entry, false); 1795 try_merge_free_space(ctl, entry, false);
1721 tree_insert_offset(&block_group->free_space_offset, 1796 tree_insert_offset(&ctl->free_space_offset,
1722 entry->offset, &entry->offset_index, bitmap); 1797 entry->offset, &entry->offset_index, bitmap);
1723 } 1798 }
1724 cluster->root = RB_ROOT; 1799 cluster->root = RB_ROOT;
@@ -1729,14 +1804,38 @@ out:
1729 return 0; 1804 return 0;
1730} 1805}
1731 1806
1732void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 1807void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1733{ 1808{
1734 struct btrfs_free_space *info; 1809 struct btrfs_free_space *info;
1735 struct rb_node *node; 1810 struct rb_node *node;
1811
1812 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1813 info = rb_entry(node, struct btrfs_free_space, offset_index);
1814 unlink_free_space(ctl, info);
1815 kfree(info->bitmap);
1816 kmem_cache_free(btrfs_free_space_cachep, info);
1817 if (need_resched()) {
1818 spin_unlock(&ctl->tree_lock);
1819 cond_resched();
1820 spin_lock(&ctl->tree_lock);
1821 }
1822 }
1823}
1824
1825void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1826{
1827 spin_lock(&ctl->tree_lock);
1828 __btrfs_remove_free_space_cache_locked(ctl);
1829 spin_unlock(&ctl->tree_lock);
1830}
1831
1832void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1833{
1834 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1736 struct btrfs_free_cluster *cluster; 1835 struct btrfs_free_cluster *cluster;
1737 struct list_head *head; 1836 struct list_head *head;
1738 1837
1739 spin_lock(&block_group->tree_lock); 1838 spin_lock(&ctl->tree_lock);
1740 while ((head = block_group->cluster_list.next) != 1839 while ((head = block_group->cluster_list.next) !=
1741 &block_group->cluster_list) { 1840 &block_group->cluster_list) {
1742 cluster = list_entry(head, struct btrfs_free_cluster, 1841 cluster = list_entry(head, struct btrfs_free_cluster,
@@ -1745,60 +1844,46 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1745 WARN_ON(cluster->block_group != block_group); 1844 WARN_ON(cluster->block_group != block_group);
1746 __btrfs_return_cluster_to_free_space(block_group, cluster); 1845 __btrfs_return_cluster_to_free_space(block_group, cluster);
1747 if (need_resched()) { 1846 if (need_resched()) {
1748 spin_unlock(&block_group->tree_lock); 1847 spin_unlock(&ctl->tree_lock);
1749 cond_resched(); 1848 cond_resched();
1750 spin_lock(&block_group->tree_lock); 1849 spin_lock(&ctl->tree_lock);
1751 } 1850 }
1752 } 1851 }
1852 __btrfs_remove_free_space_cache_locked(ctl);
1853 spin_unlock(&ctl->tree_lock);
1753 1854
1754 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1755 info = rb_entry(node, struct btrfs_free_space, offset_index);
1756 if (!info->bitmap) {
1757 unlink_free_space(block_group, info);
1758 kmem_cache_free(btrfs_free_space_cachep, info);
1759 } else {
1760 free_bitmap(block_group, info);
1761 }
1762
1763 if (need_resched()) {
1764 spin_unlock(&block_group->tree_lock);
1765 cond_resched();
1766 spin_lock(&block_group->tree_lock);
1767 }
1768 }
1769
1770 spin_unlock(&block_group->tree_lock);
1771} 1855}
1772 1856
1773u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 1857u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1774 u64 offset, u64 bytes, u64 empty_size) 1858 u64 offset, u64 bytes, u64 empty_size)
1775{ 1859{
1860 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1776 struct btrfs_free_space *entry = NULL; 1861 struct btrfs_free_space *entry = NULL;
1777 u64 bytes_search = bytes + empty_size; 1862 u64 bytes_search = bytes + empty_size;
1778 u64 ret = 0; 1863 u64 ret = 0;
1779 1864
1780 spin_lock(&block_group->tree_lock); 1865 spin_lock(&ctl->tree_lock);
1781 entry = find_free_space(block_group, &offset, &bytes_search, 0); 1866 entry = find_free_space(ctl, &offset, &bytes_search);
1782 if (!entry) 1867 if (!entry)
1783 goto out; 1868 goto out;
1784 1869
1785 ret = offset; 1870 ret = offset;
1786 if (entry->bitmap) { 1871 if (entry->bitmap) {
1787 bitmap_clear_bits(block_group, entry, offset, bytes); 1872 bitmap_clear_bits(ctl, entry, offset, bytes);
1788 if (!entry->bytes) 1873 if (!entry->bytes)
1789 free_bitmap(block_group, entry); 1874 free_bitmap(ctl, entry);
1790 } else { 1875 } else {
1791 unlink_free_space(block_group, entry); 1876 unlink_free_space(ctl, entry);
1792 entry->offset += bytes; 1877 entry->offset += bytes;
1793 entry->bytes -= bytes; 1878 entry->bytes -= bytes;
1794 if (!entry->bytes) 1879 if (!entry->bytes)
1795 kmem_cache_free(btrfs_free_space_cachep, entry); 1880 kmem_cache_free(btrfs_free_space_cachep, entry);
1796 else 1881 else
1797 link_free_space(block_group, entry); 1882 link_free_space(ctl, entry);
1798 } 1883 }
1799 1884
1800out: 1885out:
1801 spin_unlock(&block_group->tree_lock); 1886 spin_unlock(&ctl->tree_lock);
1802 1887
1803 return ret; 1888 return ret;
1804} 1889}
@@ -1815,6 +1900,7 @@ int btrfs_return_cluster_to_free_space(
1815 struct btrfs_block_group_cache *block_group, 1900 struct btrfs_block_group_cache *block_group,
1816 struct btrfs_free_cluster *cluster) 1901 struct btrfs_free_cluster *cluster)
1817{ 1902{
1903 struct btrfs_free_space_ctl *ctl;
1818 int ret; 1904 int ret;
1819 1905
1820 /* first, get a safe pointer to the block group */ 1906 /* first, get a safe pointer to the block group */
@@ -1833,10 +1919,12 @@ int btrfs_return_cluster_to_free_space(
1833 atomic_inc(&block_group->count); 1919 atomic_inc(&block_group->count);
1834 spin_unlock(&cluster->lock); 1920 spin_unlock(&cluster->lock);
1835 1921
1922 ctl = block_group->free_space_ctl;
1923
1836 /* now return any extents the cluster had on it */ 1924 /* now return any extents the cluster had on it */
1837 spin_lock(&block_group->tree_lock); 1925 spin_lock(&ctl->tree_lock);
1838 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 1926 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1839 spin_unlock(&block_group->tree_lock); 1927 spin_unlock(&ctl->tree_lock);
1840 1928
1841 /* finally drop our ref */ 1929 /* finally drop our ref */
1842 btrfs_put_block_group(block_group); 1930 btrfs_put_block_group(block_group);
@@ -1848,6 +1936,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1848 struct btrfs_free_space *entry, 1936 struct btrfs_free_space *entry,
1849 u64 bytes, u64 min_start) 1937 u64 bytes, u64 min_start)
1850{ 1938{
1939 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1851 int err; 1940 int err;
1852 u64 search_start = cluster->window_start; 1941 u64 search_start = cluster->window_start;
1853 u64 search_bytes = bytes; 1942 u64 search_bytes = bytes;
@@ -1856,13 +1945,12 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1856 search_start = min_start; 1945 search_start = min_start;
1857 search_bytes = bytes; 1946 search_bytes = bytes;
1858 1947
1859 err = search_bitmap(block_group, entry, &search_start, 1948 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
1860 &search_bytes);
1861 if (err) 1949 if (err)
1862 return 0; 1950 return 0;
1863 1951
1864 ret = search_start; 1952 ret = search_start;
1865 bitmap_clear_bits(block_group, entry, ret, bytes); 1953 bitmap_clear_bits(ctl, entry, ret, bytes);
1866 1954
1867 return ret; 1955 return ret;
1868} 1956}
@@ -1876,6 +1964,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1876 struct btrfs_free_cluster *cluster, u64 bytes, 1964 struct btrfs_free_cluster *cluster, u64 bytes,
1877 u64 min_start) 1965 u64 min_start)
1878{ 1966{
1967 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1879 struct btrfs_free_space *entry = NULL; 1968 struct btrfs_free_space *entry = NULL;
1880 struct rb_node *node; 1969 struct rb_node *node;
1881 u64 ret = 0; 1970 u64 ret = 0;
@@ -1933,20 +2022,20 @@ out:
1933 if (!ret) 2022 if (!ret)
1934 return 0; 2023 return 0;
1935 2024
1936 spin_lock(&block_group->tree_lock); 2025 spin_lock(&ctl->tree_lock);
1937 2026
1938 block_group->free_space -= bytes; 2027 ctl->free_space -= bytes;
1939 if (entry->bytes == 0) { 2028 if (entry->bytes == 0) {
1940 block_group->free_extents--; 2029 ctl->free_extents--;
1941 if (entry->bitmap) { 2030 if (entry->bitmap) {
1942 kfree(entry->bitmap); 2031 kfree(entry->bitmap);
1943 block_group->total_bitmaps--; 2032 ctl->total_bitmaps--;
1944 recalculate_thresholds(block_group); 2033 ctl->op->recalc_thresholds(ctl);
1945 } 2034 }
1946 kmem_cache_free(btrfs_free_space_cachep, entry); 2035 kmem_cache_free(btrfs_free_space_cachep, entry);
1947 } 2036 }
1948 2037
1949 spin_unlock(&block_group->tree_lock); 2038 spin_unlock(&ctl->tree_lock);
1950 2039
1951 return ret; 2040 return ret;
1952} 2041}
@@ -1956,6 +2045,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1956 struct btrfs_free_cluster *cluster, 2045 struct btrfs_free_cluster *cluster,
1957 u64 offset, u64 bytes, u64 min_bytes) 2046 u64 offset, u64 bytes, u64 min_bytes)
1958{ 2047{
2048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1959 unsigned long next_zero; 2049 unsigned long next_zero;
1960 unsigned long i; 2050 unsigned long i;
1961 unsigned long search_bits; 2051 unsigned long search_bits;
@@ -2010,7 +2100,7 @@ again:
2010 2100
2011 cluster->window_start = start * block_group->sectorsize + 2101 cluster->window_start = start * block_group->sectorsize +
2012 entry->offset; 2102 entry->offset;
2013 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2103 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2014 ret = tree_insert_offset(&cluster->root, entry->offset, 2104 ret = tree_insert_offset(&cluster->root, entry->offset,
2015 &entry->offset_index, 1); 2105 &entry->offset_index, 1);
2016 BUG_ON(ret); 2106 BUG_ON(ret);
@@ -2025,6 +2115,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2025 struct btrfs_free_cluster *cluster, 2115 struct btrfs_free_cluster *cluster,
2026 u64 offset, u64 bytes, u64 min_bytes) 2116 u64 offset, u64 bytes, u64 min_bytes)
2027{ 2117{
2118 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2028 struct btrfs_free_space *first = NULL; 2119 struct btrfs_free_space *first = NULL;
2029 struct btrfs_free_space *entry = NULL; 2120 struct btrfs_free_space *entry = NULL;
2030 struct btrfs_free_space *prev = NULL; 2121 struct btrfs_free_space *prev = NULL;
@@ -2035,7 +2126,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2035 u64 max_extent; 2126 u64 max_extent;
2036 u64 max_gap = 128 * 1024; 2127 u64 max_gap = 128 * 1024;
2037 2128
2038 entry = tree_search_offset(block_group, offset, 0, 1); 2129 entry = tree_search_offset(ctl, offset, 0, 1);
2039 if (!entry) 2130 if (!entry)
2040 return -ENOSPC; 2131 return -ENOSPC;
2041 2132
@@ -2101,7 +2192,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2101 if (entry->bitmap) 2192 if (entry->bitmap)
2102 continue; 2193 continue;
2103 2194
2104 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2195 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2105 ret = tree_insert_offset(&cluster->root, entry->offset, 2196 ret = tree_insert_offset(&cluster->root, entry->offset,
2106 &entry->offset_index, 0); 2197 &entry->offset_index, 0);
2107 BUG_ON(ret); 2198 BUG_ON(ret);
@@ -2120,16 +2211,15 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2120 struct btrfs_free_cluster *cluster, 2211 struct btrfs_free_cluster *cluster,
2121 u64 offset, u64 bytes, u64 min_bytes) 2212 u64 offset, u64 bytes, u64 min_bytes)
2122{ 2213{
2214 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2123 struct btrfs_free_space *entry; 2215 struct btrfs_free_space *entry;
2124 struct rb_node *node; 2216 struct rb_node *node;
2125 int ret = -ENOSPC; 2217 int ret = -ENOSPC;
2126 2218
2127 if (block_group->total_bitmaps == 0) 2219 if (ctl->total_bitmaps == 0)
2128 return -ENOSPC; 2220 return -ENOSPC;
2129 2221
2130 entry = tree_search_offset(block_group, 2222 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2131 offset_to_bitmap(block_group, offset),
2132 0, 1);
2133 if (!entry) 2223 if (!entry)
2134 return -ENOSPC; 2224 return -ENOSPC;
2135 2225
@@ -2162,6 +2252,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2162 struct btrfs_free_cluster *cluster, 2252 struct btrfs_free_cluster *cluster,
2163 u64 offset, u64 bytes, u64 empty_size) 2253 u64 offset, u64 bytes, u64 empty_size)
2164{ 2254{
2255 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2165 u64 min_bytes; 2256 u64 min_bytes;
2166 int ret; 2257 int ret;
2167 2258
@@ -2181,14 +2272,14 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2181 } else 2272 } else
2182 min_bytes = max(bytes, (bytes + empty_size) >> 2); 2273 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2183 2274
2184 spin_lock(&block_group->tree_lock); 2275 spin_lock(&ctl->tree_lock);
2185 2276
2186 /* 2277 /*
2187 * If we know we don't have enough space to make a cluster don't even 2278 * If we know we don't have enough space to make a cluster don't even
2188 * bother doing all the work to try and find one. 2279 * bother doing all the work to try and find one.
2189 */ 2280 */
2190 if (block_group->free_space < min_bytes) { 2281 if (ctl->free_space < min_bytes) {
2191 spin_unlock(&block_group->tree_lock); 2282 spin_unlock(&ctl->tree_lock);
2192 return -ENOSPC; 2283 return -ENOSPC;
2193 } 2284 }
2194 2285
@@ -2214,7 +2305,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2214 } 2305 }
2215out: 2306out:
2216 spin_unlock(&cluster->lock); 2307 spin_unlock(&cluster->lock);
2217 spin_unlock(&block_group->tree_lock); 2308 spin_unlock(&ctl->tree_lock);
2218 2309
2219 return ret; 2310 return ret;
2220} 2311}
@@ -2235,6 +2326,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2235int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2326int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2236 u64 *trimmed, u64 start, u64 end, u64 minlen) 2327 u64 *trimmed, u64 start, u64 end, u64 minlen)
2237{ 2328{
2329 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2238 struct btrfs_free_space *entry = NULL; 2330 struct btrfs_free_space *entry = NULL;
2239 struct btrfs_fs_info *fs_info = block_group->fs_info; 2331 struct btrfs_fs_info *fs_info = block_group->fs_info;
2240 u64 bytes = 0; 2332 u64 bytes = 0;
@@ -2244,52 +2336,50 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2244 *trimmed = 0; 2336 *trimmed = 0;
2245 2337
2246 while (start < end) { 2338 while (start < end) {
2247 spin_lock(&block_group->tree_lock); 2339 spin_lock(&ctl->tree_lock);
2248 2340
2249 if (block_group->free_space < minlen) { 2341 if (ctl->free_space < minlen) {
2250 spin_unlock(&block_group->tree_lock); 2342 spin_unlock(&ctl->tree_lock);
2251 break; 2343 break;
2252 } 2344 }
2253 2345
2254 entry = tree_search_offset(block_group, start, 0, 1); 2346 entry = tree_search_offset(ctl, start, 0, 1);
2255 if (!entry) 2347 if (!entry)
2256 entry = tree_search_offset(block_group, 2348 entry = tree_search_offset(ctl,
2257 offset_to_bitmap(block_group, 2349 offset_to_bitmap(ctl, start),
2258 start),
2259 1, 1); 2350 1, 1);
2260 2351
2261 if (!entry || entry->offset >= end) { 2352 if (!entry || entry->offset >= end) {
2262 spin_unlock(&block_group->tree_lock); 2353 spin_unlock(&ctl->tree_lock);
2263 break; 2354 break;
2264 } 2355 }
2265 2356
2266 if (entry->bitmap) { 2357 if (entry->bitmap) {
2267 ret = search_bitmap(block_group, entry, &start, &bytes); 2358 ret = search_bitmap(ctl, entry, &start, &bytes);
2268 if (!ret) { 2359 if (!ret) {
2269 if (start >= end) { 2360 if (start >= end) {
2270 spin_unlock(&block_group->tree_lock); 2361 spin_unlock(&ctl->tree_lock);
2271 break; 2362 break;
2272 } 2363 }
2273 bytes = min(bytes, end - start); 2364 bytes = min(bytes, end - start);
2274 bitmap_clear_bits(block_group, entry, 2365 bitmap_clear_bits(ctl, entry, start, bytes);
2275 start, bytes);
2276 if (entry->bytes == 0) 2366 if (entry->bytes == 0)
2277 free_bitmap(block_group, entry); 2367 free_bitmap(ctl, entry);
2278 } else { 2368 } else {
2279 start = entry->offset + BITS_PER_BITMAP * 2369 start = entry->offset + BITS_PER_BITMAP *
2280 block_group->sectorsize; 2370 block_group->sectorsize;
2281 spin_unlock(&block_group->tree_lock); 2371 spin_unlock(&ctl->tree_lock);
2282 ret = 0; 2372 ret = 0;
2283 continue; 2373 continue;
2284 } 2374 }
2285 } else { 2375 } else {
2286 start = entry->offset; 2376 start = entry->offset;
2287 bytes = min(entry->bytes, end - start); 2377 bytes = min(entry->bytes, end - start);
2288 unlink_free_space(block_group, entry); 2378 unlink_free_space(ctl, entry);
2289 kmem_cache_free(btrfs_free_space_cachep, entry); 2379 kmem_cache_free(btrfs_free_space_cachep, entry);
2290 } 2380 }
2291 2381
2292 spin_unlock(&block_group->tree_lock); 2382 spin_unlock(&ctl->tree_lock);
2293 2383
2294 if (bytes >= minlen) { 2384 if (bytes >= minlen) {
2295 int update_ret; 2385 int update_ret;
@@ -2301,8 +2391,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2301 bytes, 2391 bytes,
2302 &actually_trimmed); 2392 &actually_trimmed);
2303 2393
2304 btrfs_add_free_space(block_group, 2394 btrfs_add_free_space(block_group, start, bytes);
2305 start, bytes);
2306 if (!update_ret) 2395 if (!update_ret)
2307 btrfs_update_reserved_bytes(block_group, 2396 btrfs_update_reserved_bytes(block_group,
2308 bytes, 0, 1); 2397 bytes, 0, 1);
@@ -2324,3 +2413,145 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2324 2413
2325 return ret; 2414 return ret;
2326} 2415}
2416
2417/*
2418 * Find the left-most item in the cache tree, and then return the
2419 * smallest inode number in the item.
2420 *
2421 * Note: the returned inode number may not be the smallest one in
2422 * the tree, if the left-most item is a bitmap.
2423 */
2424u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2425{
2426 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2427 struct btrfs_free_space *entry = NULL;
2428 u64 ino = 0;
2429
2430 spin_lock(&ctl->tree_lock);
2431
2432 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2433 goto out;
2434
2435 entry = rb_entry(rb_first(&ctl->free_space_offset),
2436 struct btrfs_free_space, offset_index);
2437
2438 if (!entry->bitmap) {
2439 ino = entry->offset;
2440
2441 unlink_free_space(ctl, entry);
2442 entry->offset++;
2443 entry->bytes--;
2444 if (!entry->bytes)
2445 kmem_cache_free(btrfs_free_space_cachep, entry);
2446 else
2447 link_free_space(ctl, entry);
2448 } else {
2449 u64 offset = 0;
2450 u64 count = 1;
2451 int ret;
2452
2453 ret = search_bitmap(ctl, entry, &offset, &count);
2454 BUG_ON(ret);
2455
2456 ino = offset;
2457 bitmap_clear_bits(ctl, entry, offset, 1);
2458 if (entry->bytes == 0)
2459 free_bitmap(ctl, entry);
2460 }
2461out:
2462 spin_unlock(&ctl->tree_lock);
2463
2464 return ino;
2465}
2466
2467struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2468 struct btrfs_path *path)
2469{
2470 struct inode *inode = NULL;
2471
2472 spin_lock(&root->cache_lock);
2473 if (root->cache_inode)
2474 inode = igrab(root->cache_inode);
2475 spin_unlock(&root->cache_lock);
2476 if (inode)
2477 return inode;
2478
2479 inode = __lookup_free_space_inode(root, path, 0);
2480 if (IS_ERR(inode))
2481 return inode;
2482
2483 spin_lock(&root->cache_lock);
2484 if (!root->fs_info->closing)
2485 root->cache_inode = igrab(inode);
2486 spin_unlock(&root->cache_lock);
2487
2488 return inode;
2489}
2490
2491int create_free_ino_inode(struct btrfs_root *root,
2492 struct btrfs_trans_handle *trans,
2493 struct btrfs_path *path)
2494{
2495 return __create_free_space_inode(root, trans, path,
2496 BTRFS_FREE_INO_OBJECTID, 0);
2497}
2498
2499int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2500{
2501 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2502 struct btrfs_path *path;
2503 struct inode *inode;
2504 int ret = 0;
2505 u64 root_gen = btrfs_root_generation(&root->root_item);
2506
2507 /*
2508 * If we're unmounting then just return, since this does a search on the
2509 * normal root and not the commit root and we could deadlock.
2510 */
2511 smp_mb();
2512 if (fs_info->closing)
2513 return 0;
2514
2515 path = btrfs_alloc_path();
2516 if (!path)
2517 return 0;
2518
2519 inode = lookup_free_ino_inode(root, path);
2520 if (IS_ERR(inode))
2521 goto out;
2522
2523 if (root_gen != BTRFS_I(inode)->generation)
2524 goto out_put;
2525
2526 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2527
2528 if (ret < 0)
2529 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2530 "root %llu\n", root->root_key.objectid);
2531out_put:
2532 iput(inode);
2533out:
2534 btrfs_free_path(path);
2535 return ret;
2536}
2537
2538int btrfs_write_out_ino_cache(struct btrfs_root *root,
2539 struct btrfs_trans_handle *trans,
2540 struct btrfs_path *path)
2541{
2542 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2543 struct inode *inode;
2544 int ret;
2545
2546 inode = lookup_free_ino_inode(root, path);
2547 if (IS_ERR(inode))
2548 return 0;
2549
2550 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2551 if (ret < 0)
2552 printk(KERN_ERR "btrfs: failed to write free ino cache "
2553 "for root %llu\n", root->root_key.objectid);
2554
2555 iput(inode);
2556 return ret;
2557}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 12b2b5165f8..8f2613f779e 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,25 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct btrfs_free_space_ctl {
31 spinlock_t tree_lock;
32 struct rb_root free_space_offset;
33 u64 free_space;
34 int extents_thresh;
35 int free_extents;
36 int total_bitmaps;
37 int unit;
38 u64 start;
39 struct btrfs_free_space_op *op;
40 void *private;
41};
42
43struct btrfs_free_space_op {
44 void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
45 bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
46 struct btrfs_free_space *info);
47};
48
30struct inode *lookup_free_space_inode(struct btrfs_root *root, 49struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache 50 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path); 51 *block_group, struct btrfs_path *path);
@@ -45,14 +64,36 @@ int btrfs_write_out_cache(struct btrfs_root *root,
45 struct btrfs_trans_handle *trans, 64 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group, 65 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path); 66 struct btrfs_path *path);
48int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 67
49 u64 bytenr, u64 size); 68struct inode *lookup_free_ino_inode(struct btrfs_root *root,
69 struct btrfs_path *path);
70int create_free_ino_inode(struct btrfs_root *root,
71 struct btrfs_trans_handle *trans,
72 struct btrfs_path *path);
73int load_free_ino_cache(struct btrfs_fs_info *fs_info,
74 struct btrfs_root *root);
75int btrfs_write_out_ino_cache(struct btrfs_root *root,
76 struct btrfs_trans_handle *trans,
77 struct btrfs_path *path);
78
79void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
80int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
81 u64 bytenr, u64 size);
82static inline int
83btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
84 u64 bytenr, u64 size)
85{
86 return __btrfs_add_free_space(block_group->free_space_ctl,
87 bytenr, size);
88}
50int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 89int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
51 u64 bytenr, u64 size); 90 u64 bytenr, u64 size);
91void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
52void btrfs_remove_free_space_cache(struct btrfs_block_group_cache 92void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
53 *block_group); 93 *block_group);
54u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 94u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
55 u64 offset, u64 bytes, u64 empty_size); 95 u64 offset, u64 bytes, u64 empty_size);
96u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
56void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 97void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
57 u64 bytes); 98 u64 bytes);
58int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 99int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index c05a08f4c41..00097051262 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -16,11 +16,430 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/delay.h>
20#include <linux/kthread.h>
21#include <linux/pagemap.h>
22
19#include "ctree.h" 23#include "ctree.h"
20#include "disk-io.h" 24#include "disk-io.h"
25#include "free-space-cache.h"
26#include "inode-map.h"
21#include "transaction.h" 27#include "transaction.h"
22 28
23int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) 29static int caching_kthread(void *data)
30{
31 struct btrfs_root *root = data;
32 struct btrfs_fs_info *fs_info = root->fs_info;
33 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
34 struct btrfs_key key;
35 struct btrfs_path *path;
36 struct extent_buffer *leaf;
37 u64 last = (u64)-1;
38 int slot;
39 int ret;
40
41 path = btrfs_alloc_path();
42 if (!path)
43 return -ENOMEM;
44
45 /* Since the commit root is read-only, we can safely skip locking. */
46 path->skip_locking = 1;
47 path->search_commit_root = 1;
48 path->reada = 2;
49
50 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
51 key.offset = 0;
52 key.type = BTRFS_INODE_ITEM_KEY;
53again:
54 /* need to make sure the commit_root doesn't disappear */
55 mutex_lock(&root->fs_commit_mutex);
56
57 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
58 if (ret < 0)
59 goto out;
60
61 while (1) {
62 smp_mb();
63 if (fs_info->closing > 1)
64 goto out;
65
66 leaf = path->nodes[0];
67 slot = path->slots[0];
68 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
69 ret = btrfs_next_leaf(root, path);
70 if (ret < 0)
71 goto out;
72 else if (ret > 0)
73 break;
74
75 if (need_resched() ||
76 btrfs_transaction_in_commit(fs_info)) {
77 leaf = path->nodes[0];
78
79 if (btrfs_header_nritems(leaf) == 0) {
80 WARN_ON(1);
81 break;
82 }
83
84 /*
85 * Save the key so we can advances forward
86 * in the next search.
87 */
88 btrfs_item_key_to_cpu(leaf, &key, 0);
89 btrfs_release_path(path);
90 root->cache_progress = last;
91 mutex_unlock(&root->fs_commit_mutex);
92 schedule_timeout(1);
93 goto again;
94 } else
95 continue;
96 }
97
98 btrfs_item_key_to_cpu(leaf, &key, slot);
99
100 if (key.type != BTRFS_INODE_ITEM_KEY)
101 goto next;
102
103 if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
104 break;
105
106 if (last != (u64)-1 && last + 1 != key.objectid) {
107 __btrfs_add_free_space(ctl, last + 1,
108 key.objectid - last - 1);
109 wake_up(&root->cache_wait);
110 }
111
112 last = key.objectid;
113next:
114 path->slots[0]++;
115 }
116
117 if (last < BTRFS_LAST_FREE_OBJECTID - 1) {
118 __btrfs_add_free_space(ctl, last + 1,
119 BTRFS_LAST_FREE_OBJECTID - last - 1);
120 }
121
122 spin_lock(&root->cache_lock);
123 root->cached = BTRFS_CACHE_FINISHED;
124 spin_unlock(&root->cache_lock);
125
126 root->cache_progress = (u64)-1;
127 btrfs_unpin_free_ino(root);
128out:
129 wake_up(&root->cache_wait);
130 mutex_unlock(&root->fs_commit_mutex);
131
132 btrfs_free_path(path);
133
134 return ret;
135}
136
137static void start_caching(struct btrfs_root *root)
138{
139 struct task_struct *tsk;
140 int ret;
141
142 spin_lock(&root->cache_lock);
143 if (root->cached != BTRFS_CACHE_NO) {
144 spin_unlock(&root->cache_lock);
145 return;
146 }
147
148 root->cached = BTRFS_CACHE_STARTED;
149 spin_unlock(&root->cache_lock);
150
151 ret = load_free_ino_cache(root->fs_info, root);
152 if (ret == 1) {
153 spin_lock(&root->cache_lock);
154 root->cached = BTRFS_CACHE_FINISHED;
155 spin_unlock(&root->cache_lock);
156 return;
157 }
158
159 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
160 root->root_key.objectid);
161 BUG_ON(IS_ERR(tsk));
162}
163
164int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
165{
166again:
167 *objectid = btrfs_find_ino_for_alloc(root);
168
169 if (*objectid != 0)
170 return 0;
171
172 start_caching(root);
173
174 wait_event(root->cache_wait,
175 root->cached == BTRFS_CACHE_FINISHED ||
176 root->free_ino_ctl->free_space > 0);
177
178 if (root->cached == BTRFS_CACHE_FINISHED &&
179 root->free_ino_ctl->free_space == 0)
180 return -ENOSPC;
181 else
182 goto again;
183}
184
185void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
186{
187 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
188 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
189again:
190 if (root->cached == BTRFS_CACHE_FINISHED) {
191 __btrfs_add_free_space(ctl, objectid, 1);
192 } else {
193 /*
194 * If we are in the process of caching free ino chunks,
195 * to avoid adding the same inode number to the free_ino
196 * tree twice due to cross transaction, we'll leave it
197 * in the pinned tree until a transaction is committed
198 * or the caching work is done.
199 */
200
201 mutex_lock(&root->fs_commit_mutex);
202 spin_lock(&root->cache_lock);
203 if (root->cached == BTRFS_CACHE_FINISHED) {
204 spin_unlock(&root->cache_lock);
205 mutex_unlock(&root->fs_commit_mutex);
206 goto again;
207 }
208 spin_unlock(&root->cache_lock);
209
210 start_caching(root);
211
212 if (objectid <= root->cache_progress)
213 __btrfs_add_free_space(ctl, objectid, 1);
214 else
215 __btrfs_add_free_space(pinned, objectid, 1);
216
217 mutex_unlock(&root->fs_commit_mutex);
218 }
219}
220
221/*
222 * When a transaction is committed, we'll move those inode numbers which
223 * are smaller than root->cache_progress from pinned tree to free_ino tree,
224 * and others will just be dropped, because the commit root we were
225 * searching has changed.
226 *
227 * Must be called with root->fs_commit_mutex held
228 */
229void btrfs_unpin_free_ino(struct btrfs_root *root)
230{
231 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
232 struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
233 struct btrfs_free_space *info;
234 struct rb_node *n;
235 u64 count;
236
237 while (1) {
238 n = rb_first(rbroot);
239 if (!n)
240 break;
241
242 info = rb_entry(n, struct btrfs_free_space, offset_index);
243 BUG_ON(info->bitmap);
244
245 if (info->offset > root->cache_progress)
246 goto free;
247 else if (info->offset + info->bytes > root->cache_progress)
248 count = root->cache_progress - info->offset + 1;
249 else
250 count = info->bytes;
251
252 __btrfs_add_free_space(ctl, info->offset, count);
253free:
254 rb_erase(&info->offset_index, rbroot);
255 kfree(info);
256 }
257}
258
259#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
260#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
261
262/*
263 * The goal is to keep the memory used by the free_ino tree won't
264 * exceed the memory if we use bitmaps only.
265 */
266static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
267{
268 struct btrfs_free_space *info;
269 struct rb_node *n;
270 int max_ino;
271 int max_bitmaps;
272
273 n = rb_last(&ctl->free_space_offset);
274 if (!n) {
275 ctl->extents_thresh = INIT_THRESHOLD;
276 return;
277 }
278 info = rb_entry(n, struct btrfs_free_space, offset_index);
279
280 /*
281 * Find the maximum inode number in the filesystem. Note we
282 * ignore the fact that this can be a bitmap, because we are
283 * not doing precise calculation.
284 */
285 max_ino = info->bytes - 1;
286
287 max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
288 if (max_bitmaps <= ctl->total_bitmaps) {
289 ctl->extents_thresh = 0;
290 return;
291 }
292
293 ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
294 PAGE_CACHE_SIZE / sizeof(*info);
295}
296
297/*
298 * We don't fall back to bitmap, if we are below the extents threshold
299 * or this chunk of inode numbers is a big one.
300 */
301static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
302 struct btrfs_free_space *info)
303{
304 if (ctl->free_extents < ctl->extents_thresh ||
305 info->bytes > INODES_PER_BITMAP / 10)
306 return false;
307
308 return true;
309}
310
311static struct btrfs_free_space_op free_ino_op = {
312 .recalc_thresholds = recalculate_thresholds,
313 .use_bitmap = use_bitmap,
314};
315
316static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
317{
318}
319
320static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
321 struct btrfs_free_space *info)
322{
323 /*
324 * We always use extents for two reasons:
325 *
326 * - The pinned tree is only used during the process of caching
327 * work.
328 * - Make code simpler. See btrfs_unpin_free_ino().
329 */
330 return false;
331}
332
333static struct btrfs_free_space_op pinned_free_ino_op = {
334 .recalc_thresholds = pinned_recalc_thresholds,
335 .use_bitmap = pinned_use_bitmap,
336};
337
338void btrfs_init_free_ino_ctl(struct btrfs_root *root)
339{
340 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
341 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
342
343 spin_lock_init(&ctl->tree_lock);
344 ctl->unit = 1;
345 ctl->start = 0;
346 ctl->private = NULL;
347 ctl->op = &free_ino_op;
348
349 /*
350 * Initially we allow to use 16K of ram to cache chunks of
351 * inode numbers before we resort to bitmaps. This is somewhat
352 * arbitrary, but it will be adjusted in runtime.
353 */
354 ctl->extents_thresh = INIT_THRESHOLD;
355
356 spin_lock_init(&pinned->tree_lock);
357 pinned->unit = 1;
358 pinned->start = 0;
359 pinned->private = NULL;
360 pinned->extents_thresh = 0;
361 pinned->op = &pinned_free_ino_op;
362}
363
364int btrfs_save_ino_cache(struct btrfs_root *root,
365 struct btrfs_trans_handle *trans)
366{
367 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
368 struct btrfs_path *path;
369 struct inode *inode;
370 u64 alloc_hint = 0;
371 int ret;
372 int prealloc;
373 bool retry = false;
374
375 path = btrfs_alloc_path();
376 if (!path)
377 return -ENOMEM;
378again:
379 inode = lookup_free_ino_inode(root, path);
380 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
381 ret = PTR_ERR(inode);
382 goto out;
383 }
384
385 if (IS_ERR(inode)) {
386 BUG_ON(retry);
387 retry = true;
388
389 ret = create_free_ino_inode(root, trans, path);
390 if (ret)
391 goto out;
392 goto again;
393 }
394
395 BTRFS_I(inode)->generation = 0;
396 ret = btrfs_update_inode(trans, root, inode);
397 WARN_ON(ret);
398
399 if (i_size_read(inode) > 0) {
400 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
401 if (ret)
402 goto out_put;
403 }
404
405 spin_lock(&root->cache_lock);
406 if (root->cached != BTRFS_CACHE_FINISHED) {
407 ret = -1;
408 spin_unlock(&root->cache_lock);
409 goto out_put;
410 }
411 spin_unlock(&root->cache_lock);
412
413 spin_lock(&ctl->tree_lock);
414 prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
415 prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
416 prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
417 spin_unlock(&ctl->tree_lock);
418
419 /* Just to make sure we have enough space */
420 prealloc += 8 * PAGE_CACHE_SIZE;
421
422 ret = btrfs_check_data_free_space(inode, prealloc);
423 if (ret)
424 goto out_put;
425
426 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
427 prealloc, prealloc, &alloc_hint);
428 if (ret)
429 goto out_put;
430 btrfs_free_reserved_data_space(inode, prealloc);
431
432out_put:
433 iput(inode);
434out:
435 if (ret == 0)
436 ret = btrfs_write_out_ino_cache(root, trans, path);
437
438 btrfs_free_path(path);
439 return ret;
440}
441
442static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
24{ 443{
25 struct btrfs_path *path; 444 struct btrfs_path *path;
26 int ret; 445 int ret;
@@ -55,15 +474,14 @@ error:
55 return ret; 474 return ret;
56} 475}
57 476
58int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, 477int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
59 struct btrfs_root *root,
60 u64 dirid, u64 *objectid)
61{ 478{
62 int ret; 479 int ret;
63 mutex_lock(&root->objectid_mutex); 480 mutex_lock(&root->objectid_mutex);
64 481
65 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { 482 if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
66 ret = btrfs_find_highest_inode(root, &root->highest_objectid); 483 ret = btrfs_find_highest_objectid(root,
484 &root->highest_objectid);
67 if (ret) 485 if (ret)
68 goto out; 486 goto out;
69 } 487 }
diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h
new file mode 100644
index 00000000000..ddb347bfee2
--- /dev/null
+++ b/fs/btrfs/inode-map.h
@@ -0,0 +1,13 @@
1#ifndef __BTRFS_INODE_MAP
2#define __BTRFS_INODE_MAP
3
4void btrfs_init_free_ino_ctl(struct btrfs_root *root);
5void btrfs_unpin_free_ino(struct btrfs_root *root);
6void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
7int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
8int btrfs_save_ino_cache(struct btrfs_root *root,
9 struct btrfs_trans_handle *trans);
10
11int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
12
13#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1d1017f9155..8ae72c3eedb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -52,6 +52,7 @@
52#include "compression.h" 52#include "compression.h"
53#include "locking.h" 53#include "locking.h"
54#include "free-space-cache.h" 54#include "free-space-cache.h"
55#include "inode-map.h"
55 56
56struct btrfs_iget_args { 57struct btrfs_iget_args {
57 u64 ino; 58 u64 ino;
@@ -139,7 +140,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
139 path->leave_spinning = 1; 140 path->leave_spinning = 1;
140 btrfs_set_trans_block_group(trans, inode); 141 btrfs_set_trans_block_group(trans, inode);
141 142
142 key.objectid = inode->i_ino; 143 key.objectid = btrfs_ino(inode);
143 key.offset = start; 144 key.offset = start;
144 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 145 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
145 datasize = btrfs_file_extent_calc_inline_size(cur_size); 146 datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@ -746,6 +747,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
746 return alloc_hint; 747 return alloc_hint;
747} 748}
748 749
750static inline bool is_free_space_inode(struct btrfs_root *root,
751 struct inode *inode)
752{
753 if (root == root->fs_info->tree_root ||
754 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
755 return true;
756 return false;
757}
758
749/* 759/*
750 * when extent_io.c finds a delayed allocation range in the file, 760 * when extent_io.c finds a delayed allocation range in the file,
751 * the call backs end up in this code. The basic idea is to 761 * the call backs end up in this code. The basic idea is to
@@ -778,7 +788,7 @@ static noinline int cow_file_range(struct inode *inode,
778 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 788 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
779 int ret = 0; 789 int ret = 0;
780 790
781 BUG_ON(root == root->fs_info->tree_root); 791 BUG_ON(is_free_space_inode(root, inode));
782 trans = btrfs_join_transaction(root, 1); 792 trans = btrfs_join_transaction(root, 1);
783 BUG_ON(IS_ERR(trans)); 793 BUG_ON(IS_ERR(trans));
784 btrfs_set_trans_block_group(trans, inode); 794 btrfs_set_trans_block_group(trans, inode);
@@ -1050,29 +1060,31 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1050 int type; 1060 int type;
1051 int nocow; 1061 int nocow;
1052 int check_prev = 1; 1062 int check_prev = 1;
1053 bool nolock = false; 1063 bool nolock;
1064 u64 ino = btrfs_ino(inode);
1054 1065
1055 path = btrfs_alloc_path(); 1066 path = btrfs_alloc_path();
1056 BUG_ON(!path); 1067 BUG_ON(!path);
1057 if (root == root->fs_info->tree_root) { 1068
1058 nolock = true; 1069 nolock = is_free_space_inode(root, inode);
1070
1071 if (nolock)
1059 trans = btrfs_join_transaction_nolock(root, 1); 1072 trans = btrfs_join_transaction_nolock(root, 1);
1060 } else { 1073 else
1061 trans = btrfs_join_transaction(root, 1); 1074 trans = btrfs_join_transaction(root, 1);
1062 }
1063 BUG_ON(IS_ERR(trans)); 1075 BUG_ON(IS_ERR(trans));
1064 1076
1065 cow_start = (u64)-1; 1077 cow_start = (u64)-1;
1066 cur_offset = start; 1078 cur_offset = start;
1067 while (1) { 1079 while (1) {
1068 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 1080 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1069 cur_offset, 0); 1081 cur_offset, 0);
1070 BUG_ON(ret < 0); 1082 BUG_ON(ret < 0);
1071 if (ret > 0 && path->slots[0] > 0 && check_prev) { 1083 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1072 leaf = path->nodes[0]; 1084 leaf = path->nodes[0];
1073 btrfs_item_key_to_cpu(leaf, &found_key, 1085 btrfs_item_key_to_cpu(leaf, &found_key,
1074 path->slots[0] - 1); 1086 path->slots[0] - 1);
1075 if (found_key.objectid == inode->i_ino && 1087 if (found_key.objectid == ino &&
1076 found_key.type == BTRFS_EXTENT_DATA_KEY) 1088 found_key.type == BTRFS_EXTENT_DATA_KEY)
1077 path->slots[0]--; 1089 path->slots[0]--;
1078 } 1090 }
@@ -1093,7 +1105,7 @@ next_slot:
1093 num_bytes = 0; 1105 num_bytes = 0;
1094 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1106 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1095 1107
1096 if (found_key.objectid > inode->i_ino || 1108 if (found_key.objectid > ino ||
1097 found_key.type > BTRFS_EXTENT_DATA_KEY || 1109 found_key.type > BTRFS_EXTENT_DATA_KEY ||
1098 found_key.offset > end) 1110 found_key.offset > end)
1099 break; 1111 break;
@@ -1128,7 +1140,7 @@ next_slot:
1128 goto out_check; 1140 goto out_check;
1129 if (btrfs_extent_readonly(root, disk_bytenr)) 1141 if (btrfs_extent_readonly(root, disk_bytenr))
1130 goto out_check; 1142 goto out_check;
1131 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 1143 if (btrfs_cross_ref_exist(trans, root, ino,
1132 found_key.offset - 1144 found_key.offset -
1133 extent_offset, disk_bytenr)) 1145 extent_offset, disk_bytenr))
1134 goto out_check; 1146 goto out_check;
@@ -1317,8 +1329,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
1317 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1329 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1318 struct btrfs_root *root = BTRFS_I(inode)->root; 1330 struct btrfs_root *root = BTRFS_I(inode)->root;
1319 u64 len = state->end + 1 - state->start; 1331 u64 len = state->end + 1 - state->start;
1320 int do_list = (root->root_key.objectid != 1332 bool do_list = !is_free_space_inode(root, inode);
1321 BTRFS_ROOT_TREE_OBJECTID);
1322 1333
1323 if (*bits & EXTENT_FIRST_DELALLOC) 1334 if (*bits & EXTENT_FIRST_DELALLOC)
1324 *bits &= ~EXTENT_FIRST_DELALLOC; 1335 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1351,8 +1362,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1351 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1362 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1352 struct btrfs_root *root = BTRFS_I(inode)->root; 1363 struct btrfs_root *root = BTRFS_I(inode)->root;
1353 u64 len = state->end + 1 - state->start; 1364 u64 len = state->end + 1 - state->start;
1354 int do_list = (root->root_key.objectid != 1365 bool do_list = !is_free_space_inode(root, inode);
1355 BTRFS_ROOT_TREE_OBJECTID);
1356 1366
1357 if (*bits & EXTENT_FIRST_DELALLOC) 1367 if (*bits & EXTENT_FIRST_DELALLOC)
1358 *bits &= ~EXTENT_FIRST_DELALLOC; 1368 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1459,7 +1469,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1459 1469
1460 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1470 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1461 1471
1462 if (root == root->fs_info->tree_root) 1472 if (is_free_space_inode(root, inode))
1463 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1473 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1464 else 1474 else
1465 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1475 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1645,7 +1655,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1645 &hint, 0); 1655 &hint, 0);
1646 BUG_ON(ret); 1656 BUG_ON(ret);
1647 1657
1648 ins.objectid = inode->i_ino; 1658 ins.objectid = btrfs_ino(inode);
1649 ins.offset = file_pos; 1659 ins.offset = file_pos;
1650 ins.type = BTRFS_EXTENT_DATA_KEY; 1660 ins.type = BTRFS_EXTENT_DATA_KEY;
1651 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1661 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
@@ -1676,7 +1686,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1676 ins.type = BTRFS_EXTENT_ITEM_KEY; 1686 ins.type = BTRFS_EXTENT_ITEM_KEY;
1677 ret = btrfs_alloc_reserved_file_extent(trans, root, 1687 ret = btrfs_alloc_reserved_file_extent(trans, root,
1678 root->root_key.objectid, 1688 root->root_key.objectid,
1679 inode->i_ino, file_pos, &ins); 1689 btrfs_ino(inode), file_pos, &ins);
1680 BUG_ON(ret); 1690 BUG_ON(ret);
1681 btrfs_free_path(path); 1691 btrfs_free_path(path);
1682 1692
@@ -1702,7 +1712,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1702 struct extent_state *cached_state = NULL; 1712 struct extent_state *cached_state = NULL;
1703 int compress_type = 0; 1713 int compress_type = 0;
1704 int ret; 1714 int ret;
1705 bool nolock = false; 1715 bool nolock;
1706 1716
1707 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1717 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1708 end - start + 1); 1718 end - start + 1);
@@ -1710,7 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1710 return 0; 1720 return 0;
1711 BUG_ON(!ordered_extent); 1721 BUG_ON(!ordered_extent);
1712 1722
1713 nolock = (root == root->fs_info->tree_root); 1723 nolock = is_free_space_inode(root, inode);
1714 1724
1715 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1725 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1716 BUG_ON(!list_empty(&ordered_extent->list)); 1726 BUG_ON(!list_empty(&ordered_extent->list));
@@ -2005,8 +2015,9 @@ good:
2005 return 0; 2015 return 0;
2006 2016
2007zeroit: 2017zeroit:
2008 printk_ratelimited(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " 2018 printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
2009 "private %llu\n", page->mapping->host->i_ino, 2019 "private %llu\n",
2020 (unsigned long long)btrfs_ino(page->mapping->host),
2010 (unsigned long long)start, csum, 2021 (unsigned long long)start, csum,
2011 (unsigned long long)private); 2022 (unsigned long long)private);
2012 memset(kaddr + offset, 1, end - start + 1); 2023 memset(kaddr + offset, 1, end - start + 1);
@@ -2243,7 +2254,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2243 2254
2244 /* insert an orphan item to track this unlinked/truncated file */ 2255 /* insert an orphan item to track this unlinked/truncated file */
2245 if (insert >= 1) { 2256 if (insert >= 1) {
2246 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); 2257 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2247 BUG_ON(ret); 2258 BUG_ON(ret);
2248 } 2259 }
2249 2260
@@ -2280,7 +2291,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2280 spin_unlock(&root->orphan_lock); 2291 spin_unlock(&root->orphan_lock);
2281 2292
2282 if (trans && delete_item) { 2293 if (trans && delete_item) {
2283 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2294 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
2284 BUG_ON(ret); 2295 BUG_ON(ret);
2285 } 2296 }
2286 2297
@@ -2542,7 +2553,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2542 * try to precache a NULL acl entry for files that don't have 2553 * try to precache a NULL acl entry for files that don't have
2543 * any xattrs or acls 2554 * any xattrs or acls
2544 */ 2555 */
2545 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); 2556 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
2557 btrfs_ino(inode));
2546 if (!maybe_acls) 2558 if (!maybe_acls)
2547 cache_no_acl(inode); 2559 cache_no_acl(inode);
2548 2560
@@ -2646,11 +2658,26 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2646 struct extent_buffer *leaf; 2658 struct extent_buffer *leaf;
2647 int ret; 2659 int ret;
2648 2660
2661 /*
2662 * If root is tree root, it means this inode is used to
2663 * store free space information. And these inodes are updated
2664 * when committing the transaction, so they needn't delaye to
2665 * be updated, or deadlock will occured.
2666 */
2667 if (!is_free_space_inode(root, inode)) {
2668 ret = btrfs_delayed_update_inode(trans, root, inode);
2669 if (!ret)
2670 btrfs_set_inode_last_trans(trans, inode);
2671 return ret;
2672 }
2673
2649 path = btrfs_alloc_path(); 2674 path = btrfs_alloc_path();
2650 BUG_ON(!path); 2675 if (!path)
2676 return -ENOMEM;
2677
2651 path->leave_spinning = 1; 2678 path->leave_spinning = 1;
2652 ret = btrfs_lookup_inode(trans, root, path, 2679 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
2653 &BTRFS_I(inode)->location, 1); 2680 1);
2654 if (ret) { 2681 if (ret) {
2655 if (ret > 0) 2682 if (ret > 0)
2656 ret = -ENOENT; 2683 ret = -ENOENT;
@@ -2660,7 +2687,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2660 btrfs_unlock_up_safe(path, 1); 2687 btrfs_unlock_up_safe(path, 1);
2661 leaf = path->nodes[0]; 2688 leaf = path->nodes[0];
2662 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2689 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2663 struct btrfs_inode_item); 2690 struct btrfs_inode_item);
2664 2691
2665 fill_inode_item(trans, leaf, inode_item, inode); 2692 fill_inode_item(trans, leaf, inode_item, inode);
2666 btrfs_mark_buffer_dirty(leaf); 2693 btrfs_mark_buffer_dirty(leaf);
@@ -2671,7 +2698,6 @@ failed:
2671 return ret; 2698 return ret;
2672} 2699}
2673 2700
2674
2675/* 2701/*
2676 * unlink helper that gets used here in inode.c and in the tree logging 2702 * unlink helper that gets used here in inode.c and in the tree logging
2677 * recovery code. It remove a link in a directory with a given name, and 2703 * recovery code. It remove a link in a directory with a given name, and
@@ -2688,6 +2714,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2688 struct btrfs_dir_item *di; 2714 struct btrfs_dir_item *di;
2689 struct btrfs_key key; 2715 struct btrfs_key key;
2690 u64 index; 2716 u64 index;
2717 u64 ino = btrfs_ino(inode);
2718 u64 dir_ino = btrfs_ino(dir);
2691 2719
2692 path = btrfs_alloc_path(); 2720 path = btrfs_alloc_path();
2693 if (!path) { 2721 if (!path) {
@@ -2696,7 +2724,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2696 } 2724 }
2697 2725
2698 path->leave_spinning = 1; 2726 path->leave_spinning = 1;
2699 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2727 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2700 name, name_len, -1); 2728 name, name_len, -1);
2701 if (IS_ERR(di)) { 2729 if (IS_ERR(di)) {
2702 ret = PTR_ERR(di); 2730 ret = PTR_ERR(di);
@@ -2713,31 +2741,21 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2713 goto err; 2741 goto err;
2714 btrfs_release_path(path); 2742 btrfs_release_path(path);
2715 2743
2716 ret = btrfs_del_inode_ref(trans, root, name, name_len, 2744 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
2717 inode->i_ino, 2745 dir_ino, &index);
2718 dir->i_ino, &index);
2719 if (ret) { 2746 if (ret) {
2720 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 2747 printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
2721 "inode %lu parent %lu\n", name_len, name, 2748 "inode %llu parent %llu\n", name_len, name,
2722 inode->i_ino, dir->i_ino); 2749 (unsigned long long)ino, (unsigned long long)dir_ino);
2723 goto err; 2750 goto err;
2724 } 2751 }
2725 2752
2726 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 2753 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2727 index, name, name_len, -1); 2754 if (ret)
2728 if (IS_ERR(di)) {
2729 ret = PTR_ERR(di);
2730 goto err;
2731 }
2732 if (!di) {
2733 ret = -ENOENT;
2734 goto err; 2755 goto err;
2735 }
2736 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2737 btrfs_release_path(path);
2738 2756
2739 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2757 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2740 inode, dir->i_ino); 2758 inode, dir_ino);
2741 BUG_ON(ret != 0 && ret != -ENOENT); 2759 BUG_ON(ret != 0 && ret != -ENOENT);
2742 2760
2743 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2761 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
@@ -2815,12 +2833,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2815 int check_link = 1; 2833 int check_link = 1;
2816 int err = -ENOSPC; 2834 int err = -ENOSPC;
2817 int ret; 2835 int ret;
2836 u64 ino = btrfs_ino(inode);
2837 u64 dir_ino = btrfs_ino(dir);
2818 2838
2819 trans = btrfs_start_transaction(root, 10); 2839 trans = btrfs_start_transaction(root, 10);
2820 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 2840 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2821 return trans; 2841 return trans;
2822 2842
2823 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 2843 if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2824 return ERR_PTR(-ENOSPC); 2844 return ERR_PTR(-ENOSPC);
2825 2845
2826 /* check if there is someone else holds reference */ 2846 /* check if there is someone else holds reference */
@@ -2879,7 +2899,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2879 2899
2880 if (ret == 0 && S_ISREG(inode->i_mode)) { 2900 if (ret == 0 && S_ISREG(inode->i_mode)) {
2881 ret = btrfs_lookup_file_extent(trans, root, path, 2901 ret = btrfs_lookup_file_extent(trans, root, path,
2882 inode->i_ino, (u64)-1, 0); 2902 ino, (u64)-1, 0);
2883 if (ret < 0) { 2903 if (ret < 0) {
2884 err = ret; 2904 err = ret;
2885 goto out; 2905 goto out;
@@ -2895,7 +2915,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2895 goto out; 2915 goto out;
2896 } 2916 }
2897 2917
2898 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2918 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2899 dentry->d_name.name, dentry->d_name.len, 0); 2919 dentry->d_name.name, dentry->d_name.len, 0);
2900 if (IS_ERR(di)) { 2920 if (IS_ERR(di)) {
2901 err = PTR_ERR(di); 2921 err = PTR_ERR(di);
@@ -2912,7 +2932,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2912 2932
2913 ref = btrfs_lookup_inode_ref(trans, root, path, 2933 ref = btrfs_lookup_inode_ref(trans, root, path,
2914 dentry->d_name.name, dentry->d_name.len, 2934 dentry->d_name.name, dentry->d_name.len,
2915 inode->i_ino, dir->i_ino, 0); 2935 ino, dir_ino, 0);
2916 if (IS_ERR(ref)) { 2936 if (IS_ERR(ref)) {
2917 err = PTR_ERR(ref); 2937 err = PTR_ERR(ref);
2918 goto out; 2938 goto out;
@@ -2923,7 +2943,15 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2923 index = btrfs_inode_ref_index(path->nodes[0], ref); 2943 index = btrfs_inode_ref_index(path->nodes[0], ref);
2924 btrfs_release_path(path); 2944 btrfs_release_path(path);
2925 2945
2926 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, 2946 /*
2947 * This is a commit root search, if we can lookup inode item and other
2948 * relative items in the commit root, it means the transaction of
2949 * dir/file creation has been committed, and the dir index item that we
2950 * delay to insert has also been inserted into the commit root. So
2951 * we needn't worry about the delayed insertion of the dir index item
2952 * here.
2953 */
2954 di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
2927 dentry->d_name.name, dentry->d_name.len, 0); 2955 dentry->d_name.name, dentry->d_name.len, 0);
2928 if (IS_ERR(di)) { 2956 if (IS_ERR(di)) {
2929 err = PTR_ERR(di); 2957 err = PTR_ERR(di);
@@ -2998,12 +3026,13 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2998 struct btrfs_key key; 3026 struct btrfs_key key;
2999 u64 index; 3027 u64 index;
3000 int ret; 3028 int ret;
3029 u64 dir_ino = btrfs_ino(dir);
3001 3030
3002 path = btrfs_alloc_path(); 3031 path = btrfs_alloc_path();
3003 if (!path) 3032 if (!path)
3004 return -ENOMEM; 3033 return -ENOMEM;
3005 3034
3006 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 3035 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3007 name, name_len, -1); 3036 name, name_len, -1);
3008 BUG_ON(IS_ERR_OR_NULL(di)); 3037 BUG_ON(IS_ERR_OR_NULL(di));
3009 3038
@@ -3016,10 +3045,10 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3016 3045
3017 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, 3046 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
3018 objectid, root->root_key.objectid, 3047 objectid, root->root_key.objectid,
3019 dir->i_ino, &index, name, name_len); 3048 dir_ino, &index, name, name_len);
3020 if (ret < 0) { 3049 if (ret < 0) {
3021 BUG_ON(ret != -ENOENT); 3050 BUG_ON(ret != -ENOENT);
3022 di = btrfs_search_dir_index_item(root, path, dir->i_ino, 3051 di = btrfs_search_dir_index_item(root, path, dir_ino,
3023 name, name_len); 3052 name, name_len);
3024 BUG_ON(IS_ERR_OR_NULL(di)); 3053 BUG_ON(IS_ERR_OR_NULL(di));
3025 3054
@@ -3028,24 +3057,16 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3028 btrfs_release_path(path); 3057 btrfs_release_path(path);
3029 index = key.offset; 3058 index = key.offset;
3030 } 3059 }
3060 btrfs_release_path(path);
3031 3061
3032 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, 3062 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3033 index, name, name_len, -1);
3034 BUG_ON(IS_ERR_OR_NULL(di));
3035
3036 leaf = path->nodes[0];
3037 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3038 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3039 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3040 BUG_ON(ret); 3063 BUG_ON(ret);
3041 btrfs_release_path(path);
3042 3064
3043 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3065 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3044 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3066 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3045 ret = btrfs_update_inode(trans, root, dir); 3067 ret = btrfs_update_inode(trans, root, dir);
3046 BUG_ON(ret); 3068 BUG_ON(ret);
3047 3069
3048 btrfs_free_path(path);
3049 return 0; 3070 return 0;
3050} 3071}
3051 3072
@@ -3058,7 +3079,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3058 unsigned long nr = 0; 3079 unsigned long nr = 0;
3059 3080
3060 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || 3081 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
3061 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 3082 btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
3062 return -ENOTEMPTY; 3083 return -ENOTEMPTY;
3063 3084
3064 trans = __unlink_start_trans(dir, dentry); 3085 trans = __unlink_start_trans(dir, dentry);
@@ -3067,7 +3088,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3067 3088
3068 btrfs_set_trans_block_group(trans, dir); 3089 btrfs_set_trans_block_group(trans, dir);
3069 3090
3070 if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3091 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3071 err = btrfs_unlink_subvol(trans, root, dir, 3092 err = btrfs_unlink_subvol(trans, root, dir,
3072 BTRFS_I(inode)->location.objectid, 3093 BTRFS_I(inode)->location.objectid,
3073 dentry->d_name.name, 3094 dentry->d_name.name,
@@ -3127,17 +3148,27 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3127 int encoding; 3148 int encoding;
3128 int ret; 3149 int ret;
3129 int err = 0; 3150 int err = 0;
3151 u64 ino = btrfs_ino(inode);
3130 3152
3131 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3153 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3132 3154
3133 if (root->ref_cows || root == root->fs_info->tree_root) 3155 if (root->ref_cows || root == root->fs_info->tree_root)
3134 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3156 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3135 3157
3158 /*
3159 * This function is also used to drop the items in the log tree before
3160 * we relog the inode, so if root != BTRFS_I(inode)->root, it means
3161 * it is used to drop the loged items. So we shouldn't kill the delayed
3162 * items.
3163 */
3164 if (min_type == 0 && root == BTRFS_I(inode)->root)
3165 btrfs_kill_delayed_inode_items(inode);
3166
3136 path = btrfs_alloc_path(); 3167 path = btrfs_alloc_path();
3137 BUG_ON(!path); 3168 BUG_ON(!path);
3138 path->reada = -1; 3169 path->reada = -1;
3139 3170
3140 key.objectid = inode->i_ino; 3171 key.objectid = ino;
3141 key.offset = (u64)-1; 3172 key.offset = (u64)-1;
3142 key.type = (u8)-1; 3173 key.type = (u8)-1;
3143 3174
@@ -3165,7 +3196,7 @@ search_again:
3165 found_type = btrfs_key_type(&found_key); 3196 found_type = btrfs_key_type(&found_key);
3166 encoding = 0; 3197 encoding = 0;
3167 3198
3168 if (found_key.objectid != inode->i_ino) 3199 if (found_key.objectid != ino)
3169 break; 3200 break;
3170 3201
3171 if (found_type < min_type) 3202 if (found_type < min_type)
@@ -3284,7 +3315,7 @@ delete:
3284 ret = btrfs_free_extent(trans, root, extent_start, 3315 ret = btrfs_free_extent(trans, root, extent_start,
3285 extent_num_bytes, 0, 3316 extent_num_bytes, 0,
3286 btrfs_header_owner(leaf), 3317 btrfs_header_owner(leaf),
3287 inode->i_ino, extent_offset); 3318 ino, extent_offset);
3288 BUG_ON(ret); 3319 BUG_ON(ret);
3289 } 3320 }
3290 3321
@@ -3293,7 +3324,9 @@ delete:
3293 3324
3294 if (path->slots[0] == 0 || 3325 if (path->slots[0] == 0 ||
3295 path->slots[0] != pending_del_slot) { 3326 path->slots[0] != pending_del_slot) {
3296 if (root->ref_cows) { 3327 if (root->ref_cows &&
3328 BTRFS_I(inode)->location.objectid !=
3329 BTRFS_FREE_INO_OBJECTID) {
3297 err = -EAGAIN; 3330 err = -EAGAIN;
3298 goto out; 3331 goto out;
3299 } 3332 }
@@ -3483,7 +3516,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3483 break; 3516 break;
3484 3517
3485 err = btrfs_insert_file_extent(trans, root, 3518 err = btrfs_insert_file_extent(trans, root,
3486 inode->i_ino, cur_offset, 0, 3519 btrfs_ino(inode), cur_offset, 0,
3487 0, hole_size, 0, hole_size, 3520 0, hole_size, 0, hole_size,
3488 0, 0, 0); 3521 0, 0, 0);
3489 if (err) 3522 if (err)
@@ -3585,7 +3618,7 @@ void btrfs_evict_inode(struct inode *inode)
3585 3618
3586 truncate_inode_pages(&inode->i_data, 0); 3619 truncate_inode_pages(&inode->i_data, 0);
3587 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3620 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3588 root == root->fs_info->tree_root)) 3621 is_free_space_inode(root, inode)))
3589 goto no_delete; 3622 goto no_delete;
3590 3623
3591 if (is_bad_inode(inode)) { 3624 if (is_bad_inode(inode)) {
@@ -3638,6 +3671,10 @@ void btrfs_evict_inode(struct inode *inode)
3638 BUG_ON(ret); 3671 BUG_ON(ret);
3639 } 3672 }
3640 3673
3674 if (!(root == root->fs_info->tree_root ||
3675 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
3676 btrfs_return_ino(root, btrfs_ino(inode));
3677
3641 nr = trans->blocks_used; 3678 nr = trans->blocks_used;
3642 btrfs_end_transaction(trans, root); 3679 btrfs_end_transaction(trans, root);
3643 btrfs_btree_balance_dirty(root, nr); 3680 btrfs_btree_balance_dirty(root, nr);
@@ -3663,7 +3700,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
3663 path = btrfs_alloc_path(); 3700 path = btrfs_alloc_path();
3664 BUG_ON(!path); 3701 BUG_ON(!path);
3665 3702
3666 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, 3703 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
3667 namelen, 0); 3704 namelen, 0);
3668 if (IS_ERR(di)) 3705 if (IS_ERR(di))
3669 ret = PTR_ERR(di); 3706 ret = PTR_ERR(di);
@@ -3716,7 +3753,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3716 3753
3717 leaf = path->nodes[0]; 3754 leaf = path->nodes[0];
3718 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 3755 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
3719 if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || 3756 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
3720 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) 3757 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
3721 goto out; 3758 goto out;
3722 3759
@@ -3755,6 +3792,7 @@ static void inode_tree_add(struct inode *inode)
3755 struct btrfs_inode *entry; 3792 struct btrfs_inode *entry;
3756 struct rb_node **p; 3793 struct rb_node **p;
3757 struct rb_node *parent; 3794 struct rb_node *parent;
3795 u64 ino = btrfs_ino(inode);
3758again: 3796again:
3759 p = &root->inode_tree.rb_node; 3797 p = &root->inode_tree.rb_node;
3760 parent = NULL; 3798 parent = NULL;
@@ -3767,9 +3805,9 @@ again:
3767 parent = *p; 3805 parent = *p;
3768 entry = rb_entry(parent, struct btrfs_inode, rb_node); 3806 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3769 3807
3770 if (inode->i_ino < entry->vfs_inode.i_ino) 3808 if (ino < btrfs_ino(&entry->vfs_inode))
3771 p = &parent->rb_left; 3809 p = &parent->rb_left;
3772 else if (inode->i_ino > entry->vfs_inode.i_ino) 3810 else if (ino > btrfs_ino(&entry->vfs_inode))
3773 p = &parent->rb_right; 3811 p = &parent->rb_right;
3774 else { 3812 else {
3775 WARN_ON(!(entry->vfs_inode.i_state & 3813 WARN_ON(!(entry->vfs_inode.i_state &
@@ -3833,9 +3871,9 @@ again:
3833 prev = node; 3871 prev = node;
3834 entry = rb_entry(node, struct btrfs_inode, rb_node); 3872 entry = rb_entry(node, struct btrfs_inode, rb_node);
3835 3873
3836 if (objectid < entry->vfs_inode.i_ino) 3874 if (objectid < btrfs_ino(&entry->vfs_inode))
3837 node = node->rb_left; 3875 node = node->rb_left;
3838 else if (objectid > entry->vfs_inode.i_ino) 3876 else if (objectid > btrfs_ino(&entry->vfs_inode))
3839 node = node->rb_right; 3877 node = node->rb_right;
3840 else 3878 else
3841 break; 3879 break;
@@ -3843,7 +3881,7 @@ again:
3843 if (!node) { 3881 if (!node) {
3844 while (prev) { 3882 while (prev) {
3845 entry = rb_entry(prev, struct btrfs_inode, rb_node); 3883 entry = rb_entry(prev, struct btrfs_inode, rb_node);
3846 if (objectid <= entry->vfs_inode.i_ino) { 3884 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
3847 node = prev; 3885 node = prev;
3848 break; 3886 break;
3849 } 3887 }
@@ -3852,7 +3890,7 @@ again:
3852 } 3890 }
3853 while (node) { 3891 while (node) {
3854 entry = rb_entry(node, struct btrfs_inode, rb_node); 3892 entry = rb_entry(node, struct btrfs_inode, rb_node);
3855 objectid = entry->vfs_inode.i_ino + 1; 3893 objectid = btrfs_ino(&entry->vfs_inode) + 1;
3856 inode = igrab(&entry->vfs_inode); 3894 inode = igrab(&entry->vfs_inode);
3857 if (inode) { 3895 if (inode) {
3858 spin_unlock(&root->inode_lock); 3896 spin_unlock(&root->inode_lock);
@@ -3890,7 +3928,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
3890static int btrfs_find_actor(struct inode *inode, void *opaque) 3928static int btrfs_find_actor(struct inode *inode, void *opaque)
3891{ 3929{
3892 struct btrfs_iget_args *args = opaque; 3930 struct btrfs_iget_args *args = opaque;
3893 return args->ino == inode->i_ino && 3931 return args->ino == btrfs_ino(inode) &&
3894 args->root == BTRFS_I(inode)->root; 3932 args->root == BTRFS_I(inode)->root;
3895} 3933}
3896 3934
@@ -4035,7 +4073,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4035 return d_splice_alias(inode, dentry); 4073 return d_splice_alias(inode, dentry);
4036} 4074}
4037 4075
4038static unsigned char btrfs_filetype_table[] = { 4076unsigned char btrfs_filetype_table[] = {
4039 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 4077 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
4040}; 4078};
4041 4079
@@ -4049,6 +4087,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4049 struct btrfs_key key; 4087 struct btrfs_key key;
4050 struct btrfs_key found_key; 4088 struct btrfs_key found_key;
4051 struct btrfs_path *path; 4089 struct btrfs_path *path;
4090 struct list_head ins_list;
4091 struct list_head del_list;
4052 int ret; 4092 int ret;
4053 struct extent_buffer *leaf; 4093 struct extent_buffer *leaf;
4054 int slot; 4094 int slot;
@@ -4061,6 +4101,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4061 char tmp_name[32]; 4101 char tmp_name[32];
4062 char *name_ptr; 4102 char *name_ptr;
4063 int name_len; 4103 int name_len;
4104 int is_curr = 0; /* filp->f_pos points to the current index? */
4064 4105
4065 /* FIXME, use a real flag for deciding about the key type */ 4106 /* FIXME, use a real flag for deciding about the key type */
4066 if (root->fs_info->tree_root == root) 4107 if (root->fs_info->tree_root == root)
@@ -4068,9 +4109,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4068 4109
4069 /* special case for "." */ 4110 /* special case for "." */
4070 if (filp->f_pos == 0) { 4111 if (filp->f_pos == 0) {
4071 over = filldir(dirent, ".", 1, 4112 over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);
4072 1, inode->i_ino,
4073 DT_DIR);
4074 if (over) 4113 if (over)
4075 return 0; 4114 return 0;
4076 filp->f_pos = 1; 4115 filp->f_pos = 1;
@@ -4085,11 +4124,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4085 filp->f_pos = 2; 4124 filp->f_pos = 2;
4086 } 4125 }
4087 path = btrfs_alloc_path(); 4126 path = btrfs_alloc_path();
4127 if (!path)
4128 return -ENOMEM;
4088 path->reada = 2; 4129 path->reada = 2;
4089 4130
4131 if (key_type == BTRFS_DIR_INDEX_KEY) {
4132 INIT_LIST_HEAD(&ins_list);
4133 INIT_LIST_HEAD(&del_list);
4134 btrfs_get_delayed_items(inode, &ins_list, &del_list);
4135 }
4136
4090 btrfs_set_key_type(&key, key_type); 4137 btrfs_set_key_type(&key, key_type);
4091 key.offset = filp->f_pos; 4138 key.offset = filp->f_pos;
4092 key.objectid = inode->i_ino; 4139 key.objectid = btrfs_ino(inode);
4093 4140
4094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4141 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4095 if (ret < 0) 4142 if (ret < 0)
@@ -4116,8 +4163,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4116 break; 4163 break;
4117 if (found_key.offset < filp->f_pos) 4164 if (found_key.offset < filp->f_pos)
4118 goto next; 4165 goto next;
4166 if (key_type == BTRFS_DIR_INDEX_KEY &&
4167 btrfs_should_delete_dir_index(&del_list,
4168 found_key.offset))
4169 goto next;
4119 4170
4120 filp->f_pos = found_key.offset; 4171 filp->f_pos = found_key.offset;
4172 is_curr = 1;
4121 4173
4122 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 4174 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
4123 di_cur = 0; 4175 di_cur = 0;
@@ -4172,6 +4224,15 @@ next:
4172 path->slots[0]++; 4224 path->slots[0]++;
4173 } 4225 }
4174 4226
4227 if (key_type == BTRFS_DIR_INDEX_KEY) {
4228 if (is_curr)
4229 filp->f_pos++;
4230 ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
4231 &ins_list);
4232 if (ret)
4233 goto nopos;
4234 }
4235
4175 /* Reached end of directory/root. Bump pos past the last item. */ 4236 /* Reached end of directory/root. Bump pos past the last item. */
4176 if (key_type == BTRFS_DIR_INDEX_KEY) 4237 if (key_type == BTRFS_DIR_INDEX_KEY)
4177 /* 4238 /*
@@ -4184,6 +4245,8 @@ next:
4184nopos: 4245nopos:
4185 ret = 0; 4246 ret = 0;
4186err: 4247err:
4248 if (key_type == BTRFS_DIR_INDEX_KEY)
4249 btrfs_put_delayed_items(&ins_list, &del_list);
4187 btrfs_free_path(path); 4250 btrfs_free_path(path);
4188 return ret; 4251 return ret;
4189} 4252}
@@ -4199,7 +4262,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4199 return 0; 4262 return 0;
4200 4263
4201 smp_mb(); 4264 smp_mb();
4202 nolock = (root->fs_info->closing && root == root->fs_info->tree_root); 4265 if (root->fs_info->closing && is_free_space_inode(root, inode))
4266 nolock = true;
4203 4267
4204 if (wbc->sync_mode == WB_SYNC_ALL) { 4268 if (wbc->sync_mode == WB_SYNC_ALL) {
4205 if (nolock) 4269 if (nolock)
@@ -4243,8 +4307,9 @@ void btrfs_dirty_inode(struct inode *inode)
4243 trans = btrfs_start_transaction(root, 1); 4307 trans = btrfs_start_transaction(root, 1);
4244 if (IS_ERR(trans)) { 4308 if (IS_ERR(trans)) {
4245 printk_ratelimited(KERN_ERR "btrfs: fail to " 4309 printk_ratelimited(KERN_ERR "btrfs: fail to "
4246 "dirty inode %lu error %ld\n", 4310 "dirty inode %llu error %ld\n",
4247 inode->i_ino, PTR_ERR(trans)); 4311 (unsigned long long)btrfs_ino(inode),
4312 PTR_ERR(trans));
4248 return; 4313 return;
4249 } 4314 }
4250 btrfs_set_trans_block_group(trans, inode); 4315 btrfs_set_trans_block_group(trans, inode);
@@ -4252,11 +4317,14 @@ void btrfs_dirty_inode(struct inode *inode)
4252 ret = btrfs_update_inode(trans, root, inode); 4317 ret = btrfs_update_inode(trans, root, inode);
4253 if (ret) { 4318 if (ret) {
4254 printk_ratelimited(KERN_ERR "btrfs: fail to " 4319 printk_ratelimited(KERN_ERR "btrfs: fail to "
4255 "dirty inode %lu error %d\n", 4320 "dirty inode %llu error %d\n",
4256 inode->i_ino, ret); 4321 (unsigned long long)btrfs_ino(inode),
4322 ret);
4257 } 4323 }
4258 } 4324 }
4259 btrfs_end_transaction(trans, root); 4325 btrfs_end_transaction(trans, root);
4326 if (BTRFS_I(inode)->delayed_node)
4327 btrfs_balance_delayed_items(root);
4260} 4328}
4261 4329
4262/* 4330/*
@@ -4272,7 +4340,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4272 struct extent_buffer *leaf; 4340 struct extent_buffer *leaf;
4273 int ret; 4341 int ret;
4274 4342
4275 key.objectid = inode->i_ino; 4343 key.objectid = btrfs_ino(inode);
4276 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); 4344 btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
4277 key.offset = (u64)-1; 4345 key.offset = (u64)-1;
4278 4346
@@ -4304,7 +4372,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
4304 leaf = path->nodes[0]; 4372 leaf = path->nodes[0];
4305 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4373 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4306 4374
4307 if (found_key.objectid != inode->i_ino || 4375 if (found_key.objectid != btrfs_ino(inode) ||
4308 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { 4376 btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
4309 BTRFS_I(inode)->index_cnt = 2; 4377 BTRFS_I(inode)->index_cnt = 2;
4310 goto out; 4378 goto out;
@@ -4325,9 +4393,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
4325 int ret = 0; 4393 int ret = 0;
4326 4394
4327 if (BTRFS_I(dir)->index_cnt == (u64)-1) { 4395 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
4328 ret = btrfs_set_inode_index_count(dir); 4396 ret = btrfs_inode_delayed_dir_index_count(dir);
4329 if (ret) 4397 if (ret) {
4330 return ret; 4398 ret = btrfs_set_inode_index_count(dir);
4399 if (ret)
4400 return ret;
4401 }
4331 } 4402 }
4332 4403
4333 *index = BTRFS_I(dir)->index_cnt; 4404 *index = BTRFS_I(dir)->index_cnt;
@@ -4363,6 +4434,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4363 return ERR_PTR(-ENOMEM); 4434 return ERR_PTR(-ENOMEM);
4364 } 4435 }
4365 4436
4437 /*
4438 * we have to initialize this early, so we can reclaim the inode
4439 * number if we fail afterwards in this function.
4440 */
4441 inode->i_ino = objectid;
4442
4366 if (dir) { 4443 if (dir) {
4367 trace_btrfs_inode_request(dir); 4444 trace_btrfs_inode_request(dir);
4368 4445
@@ -4408,7 +4485,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4408 goto fail; 4485 goto fail;
4409 4486
4410 inode_init_owner(inode, dir, mode); 4487 inode_init_owner(inode, dir, mode);
4411 inode->i_ino = objectid;
4412 inode_set_bytes(inode, 0); 4488 inode_set_bytes(inode, 0);
4413 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4489 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
4414 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 4490 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -4472,29 +4548,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4472 int ret = 0; 4548 int ret = 0;
4473 struct btrfs_key key; 4549 struct btrfs_key key;
4474 struct btrfs_root *root = BTRFS_I(parent_inode)->root; 4550 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
4551 u64 ino = btrfs_ino(inode);
4552 u64 parent_ino = btrfs_ino(parent_inode);
4475 4553
4476 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4554 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4477 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); 4555 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
4478 } else { 4556 } else {
4479 key.objectid = inode->i_ino; 4557 key.objectid = ino;
4480 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 4558 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
4481 key.offset = 0; 4559 key.offset = 0;
4482 } 4560 }
4483 4561
4484 if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 4562 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
4485 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 4563 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
4486 key.objectid, root->root_key.objectid, 4564 key.objectid, root->root_key.objectid,
4487 parent_inode->i_ino, 4565 parent_ino, index, name, name_len);
4488 index, name, name_len);
4489 } else if (add_backref) { 4566 } else if (add_backref) {
4490 ret = btrfs_insert_inode_ref(trans, root, 4567 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
4491 name, name_len, inode->i_ino, 4568 parent_ino, index);
4492 parent_inode->i_ino, index);
4493 } 4569 }
4494 4570
4495 if (ret == 0) { 4571 if (ret == 0) {
4496 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4572 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4497 parent_inode->i_ino, &key, 4573 parent_inode, &key,
4498 btrfs_inode_type(inode), index); 4574 btrfs_inode_type(inode), index);
4499 BUG_ON(ret); 4575 BUG_ON(ret);
4500 4576
@@ -4537,10 +4613,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4537 if (!new_valid_dev(rdev)) 4613 if (!new_valid_dev(rdev))
4538 return -EINVAL; 4614 return -EINVAL;
4539 4615
4540 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4541 if (err)
4542 return err;
4543
4544 /* 4616 /*
4545 * 2 for inode item and ref 4617 * 2 for inode item and ref
4546 * 2 for dir items 4618 * 2 for dir items
@@ -4552,8 +4624,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4552 4624
4553 btrfs_set_trans_block_group(trans, dir); 4625 btrfs_set_trans_block_group(trans, dir);
4554 4626
4627 err = btrfs_find_free_ino(root, &objectid);
4628 if (err)
4629 goto out_unlock;
4630
4555 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4631 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4556 dentry->d_name.len, dir->i_ino, objectid, 4632 dentry->d_name.len, btrfs_ino(dir), objectid,
4557 BTRFS_I(dir)->block_group, mode, &index); 4633 BTRFS_I(dir)->block_group, mode, &index);
4558 if (IS_ERR(inode)) { 4634 if (IS_ERR(inode)) {
4559 err = PTR_ERR(inode); 4635 err = PTR_ERR(inode);
@@ -4600,9 +4676,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4600 u64 objectid; 4676 u64 objectid;
4601 u64 index = 0; 4677 u64 index = 0;
4602 4678
4603 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4604 if (err)
4605 return err;
4606 /* 4679 /*
4607 * 2 for inode item and ref 4680 * 2 for inode item and ref
4608 * 2 for dir items 4681 * 2 for dir items
@@ -4614,8 +4687,12 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4614 4687
4615 btrfs_set_trans_block_group(trans, dir); 4688 btrfs_set_trans_block_group(trans, dir);
4616 4689
4690 err = btrfs_find_free_ino(root, &objectid);
4691 if (err)
4692 goto out_unlock;
4693
4617 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4694 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4618 dentry->d_name.len, dir->i_ino, objectid, 4695 dentry->d_name.len, btrfs_ino(dir), objectid,
4619 BTRFS_I(dir)->block_group, mode, &index); 4696 BTRFS_I(dir)->block_group, mode, &index);
4620 if (IS_ERR(inode)) { 4697 if (IS_ERR(inode)) {
4621 err = PTR_ERR(inode); 4698 err = PTR_ERR(inode);
@@ -4726,10 +4803,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4726 u64 index = 0; 4803 u64 index = 0;
4727 unsigned long nr = 1; 4804 unsigned long nr = 1;
4728 4805
4729 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4730 if (err)
4731 return err;
4732
4733 /* 4806 /*
4734 * 2 items for inode and ref 4807 * 2 items for inode and ref
4735 * 2 items for dir items 4808 * 2 items for dir items
@@ -4740,8 +4813,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4740 return PTR_ERR(trans); 4813 return PTR_ERR(trans);
4741 btrfs_set_trans_block_group(trans, dir); 4814 btrfs_set_trans_block_group(trans, dir);
4742 4815
4816 err = btrfs_find_free_ino(root, &objectid);
4817 if (err)
4818 goto out_fail;
4819
4743 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4820 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4744 dentry->d_name.len, dir->i_ino, objectid, 4821 dentry->d_name.len, btrfs_ino(dir), objectid,
4745 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4822 BTRFS_I(dir)->block_group, S_IFDIR | mode,
4746 &index); 4823 &index);
4747 if (IS_ERR(inode)) { 4824 if (IS_ERR(inode)) {
@@ -4864,7 +4941,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
4864 u64 bytenr; 4941 u64 bytenr;
4865 u64 extent_start = 0; 4942 u64 extent_start = 0;
4866 u64 extent_end = 0; 4943 u64 extent_end = 0;
4867 u64 objectid = inode->i_ino; 4944 u64 objectid = btrfs_ino(inode);
4868 u32 found_type; 4945 u32 found_type;
4869 struct btrfs_path *path = NULL; 4946 struct btrfs_path *path = NULL;
4870 struct btrfs_root *root = BTRFS_I(inode)->root; 4947 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5372,7 +5449,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5372 if (!path) 5449 if (!path)
5373 return -ENOMEM; 5450 return -ENOMEM;
5374 5451
5375 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 5452 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
5376 offset, 0); 5453 offset, 0);
5377 if (ret < 0) 5454 if (ret < 0)
5378 goto out; 5455 goto out;
@@ -5389,7 +5466,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5389 ret = 0; 5466 ret = 0;
5390 leaf = path->nodes[0]; 5467 leaf = path->nodes[0];
5391 btrfs_item_key_to_cpu(leaf, &key, slot); 5468 btrfs_item_key_to_cpu(leaf, &key, slot);
5392 if (key.objectid != inode->i_ino || 5469 if (key.objectid != btrfs_ino(inode) ||
5393 key.type != BTRFS_EXTENT_DATA_KEY) { 5470 key.type != BTRFS_EXTENT_DATA_KEY) {
5394 /* not our file or wrong item type, must cow */ 5471 /* not our file or wrong item type, must cow */
5395 goto out; 5472 goto out;
@@ -5423,7 +5500,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5423 * look for other files referencing this extent, if we 5500 * look for other files referencing this extent, if we
5424 * find any we must cow 5501 * find any we must cow
5425 */ 5502 */
5426 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 5503 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
5427 key.offset - backref_offset, disk_bytenr)) 5504 key.offset - backref_offset, disk_bytenr))
5428 goto out; 5505 goto out;
5429 5506
@@ -5613,9 +5690,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5613 5690
5614 flush_dcache_page(bvec->bv_page); 5691 flush_dcache_page(bvec->bv_page);
5615 if (csum != *private) { 5692 if (csum != *private) {
5616 printk(KERN_ERR "btrfs csum failed ino %lu off" 5693 printk(KERN_ERR "btrfs csum failed ino %llu off"
5617 " %llu csum %u private %u\n", 5694 " %llu csum %u private %u\n",
5618 inode->i_ino, (unsigned long long)start, 5695 (unsigned long long)btrfs_ino(inode),
5696 (unsigned long long)start,
5619 csum, *private); 5697 csum, *private);
5620 err = -EIO; 5698 err = -EIO;
5621 } 5699 }
@@ -5762,9 +5840,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
5762 struct btrfs_dio_private *dip = bio->bi_private; 5840 struct btrfs_dio_private *dip = bio->bi_private;
5763 5841
5764 if (err) { 5842 if (err) {
5765 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " 5843 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
5766 "sector %#Lx len %u err no %d\n", 5844 "sector %#Lx len %u err no %d\n",
5767 dip->inode->i_ino, bio->bi_rw, 5845 (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
5768 (unsigned long long)bio->bi_sector, bio->bi_size, err); 5846 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5769 dip->errors = 1; 5847 dip->errors = 1;
5770 5848
@@ -6607,6 +6685,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6607 ei->dummy_inode = 0; 6685 ei->dummy_inode = 0;
6608 ei->force_compress = BTRFS_COMPRESS_NONE; 6686 ei->force_compress = BTRFS_COMPRESS_NONE;
6609 6687
6688 ei->delayed_node = NULL;
6689
6610 inode = &ei->vfs_inode; 6690 inode = &ei->vfs_inode;
6611 extent_map_tree_init(&ei->extent_tree); 6691 extent_map_tree_init(&ei->extent_tree);
6612 extent_io_tree_init(&ei->io_tree, &inode->i_data); 6692 extent_io_tree_init(&ei->io_tree, &inode->i_data);
@@ -6674,8 +6754,8 @@ void btrfs_destroy_inode(struct inode *inode)
6674 6754
6675 spin_lock(&root->orphan_lock); 6755 spin_lock(&root->orphan_lock);
6676 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6756 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6677 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6757 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
6678 inode->i_ino); 6758 (unsigned long long)btrfs_ino(inode));
6679 list_del_init(&BTRFS_I(inode)->i_orphan); 6759 list_del_init(&BTRFS_I(inode)->i_orphan);
6680 } 6760 }
6681 spin_unlock(&root->orphan_lock); 6761 spin_unlock(&root->orphan_lock);
@@ -6697,6 +6777,7 @@ void btrfs_destroy_inode(struct inode *inode)
6697 inode_tree_del(inode); 6777 inode_tree_del(inode);
6698 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6778 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
6699free: 6779free:
6780 btrfs_remove_delayed_node(inode);
6700 call_rcu(&inode->i_rcu, btrfs_i_callback); 6781 call_rcu(&inode->i_rcu, btrfs_i_callback);
6701} 6782}
6702 6783
@@ -6705,7 +6786,7 @@ int btrfs_drop_inode(struct inode *inode)
6705 struct btrfs_root *root = BTRFS_I(inode)->root; 6786 struct btrfs_root *root = BTRFS_I(inode)->root;
6706 6787
6707 if (btrfs_root_refs(&root->root_item) == 0 && 6788 if (btrfs_root_refs(&root->root_item) == 0 &&
6708 root != root->fs_info->tree_root) 6789 !is_free_space_inode(root, inode))
6709 return 1; 6790 return 1;
6710 else 6791 else
6711 return generic_drop_inode(inode); 6792 return generic_drop_inode(inode);
@@ -6808,38 +6889,39 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6808 struct btrfs_trans_handle *trans; 6889 struct btrfs_trans_handle *trans;
6809 struct btrfs_root *root = BTRFS_I(old_dir)->root; 6890 struct btrfs_root *root = BTRFS_I(old_dir)->root;
6810 struct btrfs_root *dest = BTRFS_I(new_dir)->root; 6891 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
6811 struct inode *newinode = new_dentry->d_inode; 6892 struct inode *new_inode = new_dentry->d_inode;
6812 struct inode *old_inode = old_dentry->d_inode; 6893 struct inode *old_inode = old_dentry->d_inode;
6813 struct timespec ctime = CURRENT_TIME; 6894 struct timespec ctime = CURRENT_TIME;
6814 u64 index = 0; 6895 u64 index = 0;
6815 u64 root_objectid; 6896 u64 root_objectid;
6816 int ret; 6897 int ret;
6898 u64 old_ino = btrfs_ino(old_inode);
6817 6899
6818 if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 6900 if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
6819 return -EPERM; 6901 return -EPERM;
6820 6902
6821 /* we only allow rename subvolume link between subvolumes */ 6903 /* we only allow rename subvolume link between subvolumes */
6822 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) 6904 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
6823 return -EXDEV; 6905 return -EXDEV;
6824 6906
6825 if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || 6907 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
6826 (newinode && newinode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) 6908 (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
6827 return -ENOTEMPTY; 6909 return -ENOTEMPTY;
6828 6910
6829 if (S_ISDIR(old_inode->i_mode) && newinode && 6911 if (S_ISDIR(old_inode->i_mode) && new_inode &&
6830 newinode->i_size > BTRFS_EMPTY_DIR_SIZE) 6912 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
6831 return -ENOTEMPTY; 6913 return -ENOTEMPTY;
6832 /* 6914 /*
6833 * we're using rename to replace one file with another. 6915 * we're using rename to replace one file with another.
6834 * and the replacement file is large. Start IO on it now so 6916 * and the replacement file is large. Start IO on it now so
6835 * we don't add too much work to the end of the transaction 6917 * we don't add too much work to the end of the transaction
6836 */ 6918 */
6837 if (newinode && S_ISREG(old_inode->i_mode) && newinode->i_size && 6919 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
6838 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 6920 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
6839 filemap_flush(old_inode->i_mapping); 6921 filemap_flush(old_inode->i_mapping);
6840 6922
6841 /* close the racy window with snapshot create/destroy ioctl */ 6923 /* close the racy window with snapshot create/destroy ioctl */
6842 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6924 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
6843 down_read(&root->fs_info->subvol_sem); 6925 down_read(&root->fs_info->subvol_sem);
6844 /* 6926 /*
6845 * We want to reserve the absolute worst case amount of items. So if 6927 * We want to reserve the absolute worst case amount of items. So if
@@ -6864,15 +6946,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6864 if (ret) 6946 if (ret)
6865 goto out_fail; 6947 goto out_fail;
6866 6948
6867 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 6949 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
6868 /* force full log commit if subvolume involved. */ 6950 /* force full log commit if subvolume involved. */
6869 root->fs_info->last_trans_log_full_commit = trans->transid; 6951 root->fs_info->last_trans_log_full_commit = trans->transid;
6870 } else { 6952 } else {
6871 ret = btrfs_insert_inode_ref(trans, dest, 6953 ret = btrfs_insert_inode_ref(trans, dest,
6872 new_dentry->d_name.name, 6954 new_dentry->d_name.name,
6873 new_dentry->d_name.len, 6955 new_dentry->d_name.len,
6874 old_inode->i_ino, 6956 old_ino,
6875 new_dir->i_ino, index); 6957 btrfs_ino(new_dir), index);
6876 if (ret) 6958 if (ret)
6877 goto out_fail; 6959 goto out_fail;
6878 /* 6960 /*
@@ -6888,10 +6970,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6888 * make sure the inode gets flushed if it is replacing 6970 * make sure the inode gets flushed if it is replacing
6889 * something. 6971 * something.
6890 */ 6972 */
6891 if (newinode && newinode->i_size && 6973 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
6892 old_inode && S_ISREG(old_inode->i_mode)) {
6893 btrfs_add_ordered_operation(trans, root, old_inode); 6974 btrfs_add_ordered_operation(trans, root, old_inode);
6894 }
6895 6975
6896 old_dir->i_ctime = old_dir->i_mtime = ctime; 6976 old_dir->i_ctime = old_dir->i_mtime = ctime;
6897 new_dir->i_ctime = new_dir->i_mtime = ctime; 6977 new_dir->i_ctime = new_dir->i_mtime = ctime;
@@ -6900,7 +6980,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6900 if (old_dentry->d_parent != new_dentry->d_parent) 6980 if (old_dentry->d_parent != new_dentry->d_parent)
6901 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); 6981 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
6902 6982
6903 if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { 6983 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
6904 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 6984 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
6905 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, 6985 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
6906 old_dentry->d_name.name, 6986 old_dentry->d_name.name,
@@ -6915,16 +6995,16 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6915 } 6995 }
6916 BUG_ON(ret); 6996 BUG_ON(ret);
6917 6997
6918 if (newinode) { 6998 if (new_inode) {
6919 newinode->i_ctime = CURRENT_TIME; 6999 new_inode->i_ctime = CURRENT_TIME;
6920 if (unlikely(newinode->i_ino == 7000 if (unlikely(btrfs_ino(new_inode) ==
6921 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 7001 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
6922 root_objectid = BTRFS_I(newinode)->location.objectid; 7002 root_objectid = BTRFS_I(new_inode)->location.objectid;
6923 ret = btrfs_unlink_subvol(trans, dest, new_dir, 7003 ret = btrfs_unlink_subvol(trans, dest, new_dir,
6924 root_objectid, 7004 root_objectid,
6925 new_dentry->d_name.name, 7005 new_dentry->d_name.name,
6926 new_dentry->d_name.len); 7006 new_dentry->d_name.len);
6927 BUG_ON(newinode->i_nlink == 0); 7007 BUG_ON(new_inode->i_nlink == 0);
6928 } else { 7008 } else {
6929 ret = btrfs_unlink_inode(trans, dest, new_dir, 7009 ret = btrfs_unlink_inode(trans, dest, new_dir,
6930 new_dentry->d_inode, 7010 new_dentry->d_inode,
@@ -6932,7 +7012,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6932 new_dentry->d_name.len); 7012 new_dentry->d_name.len);
6933 } 7013 }
6934 BUG_ON(ret); 7014 BUG_ON(ret);
6935 if (newinode->i_nlink == 0) { 7015 if (new_inode->i_nlink == 0) {
6936 ret = btrfs_orphan_add(trans, new_dentry->d_inode); 7016 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
6937 BUG_ON(ret); 7017 BUG_ON(ret);
6938 } 7018 }
@@ -6945,7 +7025,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6945 new_dentry->d_name.len, 0, index); 7025 new_dentry->d_name.len, 0, index);
6946 BUG_ON(ret); 7026 BUG_ON(ret);
6947 7027
6948 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 7028 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
6949 struct dentry *parent = dget_parent(new_dentry); 7029 struct dentry *parent = dget_parent(new_dentry);
6950 btrfs_log_new_name(trans, old_inode, old_dir, parent); 7030 btrfs_log_new_name(trans, old_inode, old_dir, parent);
6951 dput(parent); 7031 dput(parent);
@@ -6954,7 +7034,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6954out_fail: 7034out_fail:
6955 btrfs_end_transaction_throttle(trans, root); 7035 btrfs_end_transaction_throttle(trans, root);
6956out_notrans: 7036out_notrans:
6957 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7037 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
6958 up_read(&root->fs_info->subvol_sem); 7038 up_read(&root->fs_info->subvol_sem);
6959 7039
6960 return ret; 7040 return ret;
@@ -7031,9 +7111,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7031 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 7111 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
7032 return -ENAMETOOLONG; 7112 return -ENAMETOOLONG;
7033 7113
7034 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
7035 if (err)
7036 return err;
7037 /* 7114 /*
7038 * 2 items for inode item and ref 7115 * 2 items for inode item and ref
7039 * 2 items for dir items 7116 * 2 items for dir items
@@ -7045,8 +7122,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7045 7122
7046 btrfs_set_trans_block_group(trans, dir); 7123 btrfs_set_trans_block_group(trans, dir);
7047 7124
7125 err = btrfs_find_free_ino(root, &objectid);
7126 if (err)
7127 goto out_unlock;
7128
7048 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7129 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7049 dentry->d_name.len, dir->i_ino, objectid, 7130 dentry->d_name.len, btrfs_ino(dir), objectid,
7050 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7131 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
7051 &index); 7132 &index);
7052 if (IS_ERR(inode)) { 7133 if (IS_ERR(inode)) {
@@ -7078,7 +7159,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7078 7159
7079 path = btrfs_alloc_path(); 7160 path = btrfs_alloc_path();
7080 BUG_ON(!path); 7161 BUG_ON(!path);
7081 key.objectid = inode->i_ino; 7162 key.objectid = btrfs_ino(inode);
7082 key.offset = 0; 7163 key.offset = 0;
7083 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 7164 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
7084 datasize = btrfs_file_extent_calc_inline_size(name_len); 7165 datasize = btrfs_file_extent_calc_inline_size(name_len);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d11fc6548e1..ed8c055ab70 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,6 +50,7 @@
50#include "print-tree.h" 50#include "print-tree.h"
51#include "volumes.h" 51#include "volumes.h"
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h"
53 54
54/* Mask out flags that are inappropriate for the given type of inode. */ 55/* Mask out flags that are inappropriate for the given type of inode. */
55static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 56static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -81,6 +82,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
81 iflags |= FS_NOATIME_FL; 82 iflags |= FS_NOATIME_FL;
82 if (flags & BTRFS_INODE_DIRSYNC) 83 if (flags & BTRFS_INODE_DIRSYNC)
83 iflags |= FS_DIRSYNC_FL; 84 iflags |= FS_DIRSYNC_FL;
85 if (flags & BTRFS_INODE_NODATACOW)
86 iflags |= FS_NOCOW_FL;
87
88 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
89 iflags |= FS_COMPR_FL;
90 else if (flags & BTRFS_INODE_NOCOMPRESS)
91 iflags |= FS_NOCOMP_FL;
84 92
85 return iflags; 93 return iflags;
86} 94}
@@ -144,16 +152,13 @@ static int check_flags(unsigned int flags)
144 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 152 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
145 FS_NOATIME_FL | FS_NODUMP_FL | \ 153 FS_NOATIME_FL | FS_NODUMP_FL | \
146 FS_SYNC_FL | FS_DIRSYNC_FL | \ 154 FS_SYNC_FL | FS_DIRSYNC_FL | \
147 FS_NOCOMP_FL | FS_COMPR_FL | \ 155 FS_NOCOMP_FL | FS_COMPR_FL |
148 FS_NOCOW_FL | FS_COW_FL)) 156 FS_NOCOW_FL))
149 return -EOPNOTSUPP; 157 return -EOPNOTSUPP;
150 158
151 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 159 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
152 return -EINVAL; 160 return -EINVAL;
153 161
154 if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
155 return -EINVAL;
156
157 return 0; 162 return 0;
158} 163}
159 164
@@ -218,6 +223,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
218 ip->flags |= BTRFS_INODE_DIRSYNC; 223 ip->flags |= BTRFS_INODE_DIRSYNC;
219 else 224 else
220 ip->flags &= ~BTRFS_INODE_DIRSYNC; 225 ip->flags &= ~BTRFS_INODE_DIRSYNC;
226 if (flags & FS_NOCOW_FL)
227 ip->flags |= BTRFS_INODE_NODATACOW;
228 else
229 ip->flags &= ~BTRFS_INODE_NODATACOW;
221 230
222 /* 231 /*
223 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 232 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -230,11 +239,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
230 } else if (flags & FS_COMPR_FL) { 239 } else if (flags & FS_COMPR_FL) {
231 ip->flags |= BTRFS_INODE_COMPRESS; 240 ip->flags |= BTRFS_INODE_COMPRESS;
232 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 241 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
242 } else {
243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
233 } 244 }
234 if (flags & FS_NOCOW_FL)
235 ip->flags |= BTRFS_INODE_NODATACOW;
236 else if (flags & FS_COW_FL)
237 ip->flags &= ~BTRFS_INODE_NODATACOW;
238 245
239 trans = btrfs_join_transaction(root, 1); 246 trans = btrfs_join_transaction(root, 1);
240 BUG_ON(IS_ERR(trans)); 247 BUG_ON(IS_ERR(trans));
@@ -323,8 +330,7 @@ static noinline int create_subvol(struct btrfs_root *root,
323 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 330 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
324 u64 index = 0; 331 u64 index = 0;
325 332
326 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 333 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
327 0, &objectid);
328 if (ret) { 334 if (ret) {
329 dput(parent); 335 dput(parent);
330 return ret; 336 return ret;
@@ -416,7 +422,7 @@ static noinline int create_subvol(struct btrfs_root *root,
416 BUG_ON(ret); 422 BUG_ON(ret);
417 423
418 ret = btrfs_insert_dir_item(trans, root, 424 ret = btrfs_insert_dir_item(trans, root,
419 name, namelen, dir->i_ino, &key, 425 name, namelen, dir, &key,
420 BTRFS_FT_DIR, index); 426 BTRFS_FT_DIR, index);
421 if (ret) 427 if (ret)
422 goto fail; 428 goto fail;
@@ -427,7 +433,7 @@ static noinline int create_subvol(struct btrfs_root *root,
427 433
428 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 434 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
429 objectid, root->root_key.objectid, 435 objectid, root->root_key.objectid,
430 dir->i_ino, index, name, namelen); 436 btrfs_ino(dir), index, name, namelen);
431 437
432 BUG_ON(ret); 438 BUG_ON(ret);
433 439
@@ -1123,7 +1129,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1123 int ret = 0; 1129 int ret = 0;
1124 u64 flags = 0; 1130 u64 flags = 0;
1125 1131
1126 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1132 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1127 return -EINVAL; 1133 return -EINVAL;
1128 1134
1129 down_read(&root->fs_info->subvol_sem); 1135 down_read(&root->fs_info->subvol_sem);
@@ -1150,7 +1156,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1150 if (root->fs_info->sb->s_flags & MS_RDONLY) 1156 if (root->fs_info->sb->s_flags & MS_RDONLY)
1151 return -EROFS; 1157 return -EROFS;
1152 1158
1153 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1159 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1154 return -EINVAL; 1160 return -EINVAL;
1155 1161
1156 if (copy_from_user(&flags, arg, sizeof(flags))) 1162 if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1633,7 +1639,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1633 goto out_dput; 1639 goto out_dput;
1634 } 1640 }
1635 1641
1636 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1642 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1637 err = -EINVAL; 1643 err = -EINVAL;
1638 goto out_dput; 1644 goto out_dput;
1639 } 1645 }
@@ -1919,7 +1925,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1919 } 1925 }
1920 1926
1921 /* clone data */ 1927 /* clone data */
1922 key.objectid = src->i_ino; 1928 key.objectid = btrfs_ino(src);
1923 key.type = BTRFS_EXTENT_DATA_KEY; 1929 key.type = BTRFS_EXTENT_DATA_KEY;
1924 key.offset = 0; 1930 key.offset = 0;
1925 1931
@@ -1946,7 +1952,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1946 1952
1947 btrfs_item_key_to_cpu(leaf, &key, slot); 1953 btrfs_item_key_to_cpu(leaf, &key, slot);
1948 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 1954 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
1949 key.objectid != src->i_ino) 1955 key.objectid != btrfs_ino(src))
1950 break; 1956 break;
1951 1957
1952 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 1958 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
@@ -1989,7 +1995,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1989 goto next; 1995 goto next;
1990 1996
1991 memcpy(&new_key, &key, sizeof(new_key)); 1997 memcpy(&new_key, &key, sizeof(new_key));
1992 new_key.objectid = inode->i_ino; 1998 new_key.objectid = btrfs_ino(inode);
1993 if (off <= key.offset) 1999 if (off <= key.offset)
1994 new_key.offset = key.offset + destoff - off; 2000 new_key.offset = key.offset + destoff - off;
1995 else 2001 else
@@ -2043,7 +2049,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2043 ret = btrfs_inc_extent_ref(trans, root, 2049 ret = btrfs_inc_extent_ref(trans, root,
2044 disko, diskl, 0, 2050 disko, diskl, 0,
2045 root->root_key.objectid, 2051 root->root_key.objectid,
2046 inode->i_ino, 2052 btrfs_ino(inode),
2047 new_key.offset - datao); 2053 new_key.offset - datao);
2048 BUG_ON(ret); 2054 BUG_ON(ret);
2049 } 2055 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f726e72dd36..051992c7fcc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -30,6 +30,7 @@
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h" 32#include "free-space-cache.h"
33#include "inode-map.h"
33 34
34/* 35/*
35 * backref_node, mapping_node and tree_block start with this 36 * backref_node, mapping_node and tree_block start with this
@@ -1409,9 +1410,9 @@ again:
1409 prev = node; 1410 prev = node;
1410 entry = rb_entry(node, struct btrfs_inode, rb_node); 1411 entry = rb_entry(node, struct btrfs_inode, rb_node);
1411 1412
1412 if (objectid < entry->vfs_inode.i_ino) 1413 if (objectid < btrfs_ino(&entry->vfs_inode))
1413 node = node->rb_left; 1414 node = node->rb_left;
1414 else if (objectid > entry->vfs_inode.i_ino) 1415 else if (objectid > btrfs_ino(&entry->vfs_inode))
1415 node = node->rb_right; 1416 node = node->rb_right;
1416 else 1417 else
1417 break; 1418 break;
@@ -1419,7 +1420,7 @@ again:
1419 if (!node) { 1420 if (!node) {
1420 while (prev) { 1421 while (prev) {
1421 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1422 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1422 if (objectid <= entry->vfs_inode.i_ino) { 1423 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
1423 node = prev; 1424 node = prev;
1424 break; 1425 break;
1425 } 1426 }
@@ -1434,7 +1435,7 @@ again:
1434 return inode; 1435 return inode;
1435 } 1436 }
1436 1437
1437 objectid = entry->vfs_inode.i_ino + 1; 1438 objectid = btrfs_ino(&entry->vfs_inode) + 1;
1438 if (cond_resched_lock(&root->inode_lock)) 1439 if (cond_resched_lock(&root->inode_lock))
1439 goto again; 1440 goto again;
1440 1441
@@ -1470,7 +1471,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1470 return -ENOMEM; 1471 return -ENOMEM;
1471 1472
1472 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1473 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1473 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1474 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
1474 bytenr, 0); 1475 bytenr, 0);
1475 if (ret < 0) 1476 if (ret < 0)
1476 goto out; 1477 goto out;
@@ -1558,11 +1559,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1558 if (first) { 1559 if (first) {
1559 inode = find_next_inode(root, key.objectid); 1560 inode = find_next_inode(root, key.objectid);
1560 first = 0; 1561 first = 0;
1561 } else if (inode && inode->i_ino < key.objectid) { 1562 } else if (inode && btrfs_ino(inode) < key.objectid) {
1562 btrfs_add_delayed_iput(inode); 1563 btrfs_add_delayed_iput(inode);
1563 inode = find_next_inode(root, key.objectid); 1564 inode = find_next_inode(root, key.objectid);
1564 } 1565 }
1565 if (inode && inode->i_ino == key.objectid) { 1566 if (inode && btrfs_ino(inode) == key.objectid) {
1566 end = key.offset + 1567 end = key.offset +
1567 btrfs_file_extent_num_bytes(leaf, fi); 1568 btrfs_file_extent_num_bytes(leaf, fi);
1568 WARN_ON(!IS_ALIGNED(key.offset, 1569 WARN_ON(!IS_ALIGNED(key.offset,
@@ -1893,6 +1894,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1893 struct inode *inode = NULL; 1894 struct inode *inode = NULL;
1894 u64 objectid; 1895 u64 objectid;
1895 u64 start, end; 1896 u64 start, end;
1897 u64 ino;
1896 1898
1897 objectid = min_key->objectid; 1899 objectid = min_key->objectid;
1898 while (1) { 1900 while (1) {
@@ -1905,17 +1907,18 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1905 inode = find_next_inode(root, objectid); 1907 inode = find_next_inode(root, objectid);
1906 if (!inode) 1908 if (!inode)
1907 break; 1909 break;
1910 ino = btrfs_ino(inode);
1908 1911
1909 if (inode->i_ino > max_key->objectid) { 1912 if (ino > max_key->objectid) {
1910 iput(inode); 1913 iput(inode);
1911 break; 1914 break;
1912 } 1915 }
1913 1916
1914 objectid = inode->i_ino + 1; 1917 objectid = ino + 1;
1915 if (!S_ISREG(inode->i_mode)) 1918 if (!S_ISREG(inode->i_mode))
1916 continue; 1919 continue;
1917 1920
1918 if (unlikely(min_key->objectid == inode->i_ino)) { 1921 if (unlikely(min_key->objectid == ino)) {
1919 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1922 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1920 continue; 1923 continue;
1921 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1924 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
@@ -1928,7 +1931,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1928 start = 0; 1931 start = 0;
1929 } 1932 }
1930 1933
1931 if (unlikely(max_key->objectid == inode->i_ino)) { 1934 if (unlikely(max_key->objectid == ino)) {
1932 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1935 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1933 continue; 1936 continue;
1934 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1937 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
@@ -3897,7 +3900,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3897 if (IS_ERR(trans)) 3900 if (IS_ERR(trans))
3898 return ERR_CAST(trans); 3901 return ERR_CAST(trans);
3899 3902
3900 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3903 err = btrfs_find_free_objectid(root, &objectid);
3901 if (err) 3904 if (err)
3902 goto out; 3905 goto out;
3903 3906
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3e28521643f..fb72e2bea88 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -40,6 +40,7 @@
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include "compat.h" 42#include "compat.h"
43#include "delayed-inode.h"
43#include "ctree.h" 44#include "ctree.h"
44#include "disk-io.h" 45#include "disk-io.h"
45#include "transaction.h" 46#include "transaction.h"
@@ -1206,10 +1207,14 @@ static int __init init_btrfs_fs(void)
1206 if (err) 1207 if (err)
1207 goto free_extent_io; 1208 goto free_extent_io;
1208 1209
1209 err = btrfs_interface_init(); 1210 err = btrfs_delayed_inode_init();
1210 if (err) 1211 if (err)
1211 goto free_extent_map; 1212 goto free_extent_map;
1212 1213
1214 err = btrfs_interface_init();
1215 if (err)
1216 goto free_delayed_inode;
1217
1213 err = register_filesystem(&btrfs_fs_type); 1218 err = register_filesystem(&btrfs_fs_type);
1214 if (err) 1219 if (err)
1215 goto unregister_ioctl; 1220 goto unregister_ioctl;
@@ -1219,6 +1224,8 @@ static int __init init_btrfs_fs(void)
1219 1224
1220unregister_ioctl: 1225unregister_ioctl:
1221 btrfs_interface_exit(); 1226 btrfs_interface_exit();
1227free_delayed_inode:
1228 btrfs_delayed_inode_exit();
1222free_extent_map: 1229free_extent_map:
1223 extent_map_exit(); 1230 extent_map_exit();
1224free_extent_io: 1231free_extent_io:
@@ -1235,6 +1242,7 @@ free_sysfs:
1235static void __exit exit_btrfs_fs(void) 1242static void __exit exit_btrfs_fs(void)
1236{ 1243{
1237 btrfs_destroy_cachep(); 1244 btrfs_destroy_cachep();
1245 btrfs_delayed_inode_exit();
1238 extent_map_exit(); 1246 extent_map_exit();
1239 extent_io_exit(); 1247 extent_io_exit();
1240 btrfs_interface_exit(); 1248 btrfs_interface_exit();
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index ab9633fd72a..c3c223ae669 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -174,18 +174,6 @@ static const struct sysfs_ops btrfs_root_attr_ops = {
174 .store = btrfs_root_attr_store, 174 .store = btrfs_root_attr_store,
175}; 175};
176 176
177static struct kobj_type btrfs_root_ktype = {
178 .default_attrs = btrfs_root_attrs,
179 .sysfs_ops = &btrfs_root_attr_ops,
180 .release = btrfs_root_release,
181};
182
183static struct kobj_type btrfs_super_ktype = {
184 .default_attrs = btrfs_super_attrs,
185 .sysfs_ops = &btrfs_super_attr_ops,
186 .release = btrfs_super_release,
187};
188
189/* /sys/fs/btrfs/ entry */ 177/* /sys/fs/btrfs/ entry */
190static struct kset *btrfs_kset; 178static struct kset *btrfs_kset;
191 179
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 211aceeb9ea..33679fc710c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -27,6 +27,7 @@
27#include "transaction.h" 27#include "transaction.h"
28#include "locking.h" 28#include "locking.h"
29#include "tree-log.h" 29#include "tree-log.h"
30#include "inode-map.h"
30 31
31#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
32 33
@@ -443,19 +444,40 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
443int btrfs_end_transaction(struct btrfs_trans_handle *trans, 444int btrfs_end_transaction(struct btrfs_trans_handle *trans,
444 struct btrfs_root *root) 445 struct btrfs_root *root)
445{ 446{
446 return __btrfs_end_transaction(trans, root, 0, 1); 447 int ret;
448
449 ret = __btrfs_end_transaction(trans, root, 0, 1);
450 if (ret)
451 return ret;
452 return 0;
447} 453}
448 454
449int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 455int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
450 struct btrfs_root *root) 456 struct btrfs_root *root)
451{ 457{
452 return __btrfs_end_transaction(trans, root, 1, 1); 458 int ret;
459
460 ret = __btrfs_end_transaction(trans, root, 1, 1);
461 if (ret)
462 return ret;
463 return 0;
453} 464}
454 465
455int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, 466int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
456 struct btrfs_root *root) 467 struct btrfs_root *root)
457{ 468{
458 return __btrfs_end_transaction(trans, root, 0, 0); 469 int ret;
470
471 ret = __btrfs_end_transaction(trans, root, 0, 0);
472 if (ret)
473 return ret;
474 return 0;
475}
476
477int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
478 struct btrfs_root *root)
479{
480 return __btrfs_end_transaction(trans, root, 1, 1);
459} 481}
460 482
461/* 483/*
@@ -716,8 +738,14 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
716 btrfs_update_reloc_root(trans, root); 738 btrfs_update_reloc_root(trans, root);
717 btrfs_orphan_commit_root(trans, root); 739 btrfs_orphan_commit_root(trans, root);
718 740
741 btrfs_save_ino_cache(root, trans);
742
719 if (root->commit_root != root->node) { 743 if (root->commit_root != root->node) {
744 mutex_lock(&root->fs_commit_mutex);
720 switch_commit_root(root); 745 switch_commit_root(root);
746 btrfs_unpin_free_ino(root);
747 mutex_unlock(&root->fs_commit_mutex);
748
721 btrfs_set_root_node(&root->root_item, 749 btrfs_set_root_node(&root->root_item,
722 root->node); 750 root->node);
723 } 751 }
@@ -795,7 +823,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
795 goto fail; 823 goto fail;
796 } 824 }
797 825
798 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 826 ret = btrfs_find_free_objectid(tree_root, &objectid);
799 if (ret) { 827 if (ret) {
800 pending->error = ret; 828 pending->error = ret;
801 goto fail; 829 goto fail;
@@ -832,7 +860,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
832 BUG_ON(ret); 860 BUG_ON(ret);
833 ret = btrfs_insert_dir_item(trans, parent_root, 861 ret = btrfs_insert_dir_item(trans, parent_root,
834 dentry->d_name.name, dentry->d_name.len, 862 dentry->d_name.name, dentry->d_name.len,
835 parent_inode->i_ino, &key, 863 parent_inode, &key,
836 BTRFS_FT_DIR, index); 864 BTRFS_FT_DIR, index);
837 BUG_ON(ret); 865 BUG_ON(ret);
838 866
@@ -874,7 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
874 */ 902 */
875 ret = btrfs_add_root_ref(trans, tree_root, objectid, 903 ret = btrfs_add_root_ref(trans, tree_root, objectid,
876 parent_root->root_key.objectid, 904 parent_root->root_key.objectid,
877 parent_inode->i_ino, index, 905 btrfs_ino(parent_inode), index,
878 dentry->d_name.name, dentry->d_name.len); 906 dentry->d_name.name, dentry->d_name.len);
879 BUG_ON(ret); 907 BUG_ON(ret);
880 dput(parent); 908 dput(parent);
@@ -902,6 +930,14 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
902 int ret; 930 int ret;
903 931
904 list_for_each_entry(pending, head, list) { 932 list_for_each_entry(pending, head, list) {
933 /*
934 * We must deal with the delayed items before creating
935 * snapshots, or we will create a snapthot with inconsistent
936 * information.
937 */
938 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
939 BUG_ON(ret);
940
905 ret = create_pending_snapshot(trans, fs_info, pending); 941 ret = create_pending_snapshot(trans, fs_info, pending);
906 BUG_ON(ret); 942 BUG_ON(ret);
907 } 943 }
@@ -1155,6 +1191,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1155 BUG_ON(ret); 1191 BUG_ON(ret);
1156 } 1192 }
1157 1193
1194 ret = btrfs_run_delayed_items(trans, root);
1195 BUG_ON(ret);
1196
1158 /* 1197 /*
1159 * rename don't use btrfs_join_transaction, so, once we 1198 * rename don't use btrfs_join_transaction, so, once we
1160 * set the transaction to blocked above, we aren't going 1199 * set the transaction to blocked above, we aren't going
@@ -1181,6 +1220,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1181 ret = create_pending_snapshots(trans, root->fs_info); 1220 ret = create_pending_snapshots(trans, root->fs_info);
1182 BUG_ON(ret); 1221 BUG_ON(ret);
1183 1222
1223 ret = btrfs_run_delayed_items(trans, root);
1224 BUG_ON(ret);
1225
1184 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1226 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1185 BUG_ON(ret); 1227 BUG_ON(ret);
1186 1228
@@ -1297,6 +1339,8 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1297 root = list_entry(list.next, struct btrfs_root, root_list); 1339 root = list_entry(list.next, struct btrfs_root, root_list);
1298 list_del(&root->root_list); 1340 list_del(&root->root_list);
1299 1341
1342 btrfs_kill_all_delayed_nodes(root);
1343
1300 if (btrfs_header_backref_rev(root->node) < 1344 if (btrfs_header_backref_rev(root->node) <
1301 BTRFS_MIXED_BACKREF_REV) 1345 BTRFS_MIXED_BACKREF_REV)
1302 btrfs_drop_snapshot(root, NULL, 0); 1346 btrfs_drop_snapshot(root, NULL, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 000a41008c3..804c88639e5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -112,6 +112,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
112 int wait_for_unblock); 112 int wait_for_unblock);
113int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 113int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
114 struct btrfs_root *root); 114 struct btrfs_root *root);
115int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
116 struct btrfs_root *root);
115int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 117int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
116 struct btrfs_root *root); 118 struct btrfs_root *root);
117void btrfs_throttle(struct btrfs_root *root); 119void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c599e8c2a53..a794b9f6013 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -519,7 +519,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
519 * file. This must be done before the btrfs_drop_extents run 519 * file. This must be done before the btrfs_drop_extents run
520 * so we don't try to drop this extent. 520 * so we don't try to drop this extent.
521 */ 521 */
522 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 522 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
523 start, 0); 523 start, 0);
524 524
525 if (ret == 0 && 525 if (ret == 0 &&
@@ -832,7 +832,7 @@ again:
832 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); 832 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
833 833
834 /* if we already have a perfect match, we're done */ 834 /* if we already have a perfect match, we're done */
835 if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, 835 if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
836 btrfs_inode_ref_index(eb, ref), 836 btrfs_inode_ref_index(eb, ref),
837 name, namelen)) { 837 name, namelen)) {
838 goto out; 838 goto out;
@@ -960,8 +960,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
960 unsigned long ptr; 960 unsigned long ptr;
961 unsigned long ptr_end; 961 unsigned long ptr_end;
962 int name_len; 962 int name_len;
963 u64 ino = btrfs_ino(inode);
963 964
964 key.objectid = inode->i_ino; 965 key.objectid = ino;
965 key.type = BTRFS_INODE_REF_KEY; 966 key.type = BTRFS_INODE_REF_KEY;
966 key.offset = (u64)-1; 967 key.offset = (u64)-1;
967 968
@@ -980,7 +981,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
980 } 981 }
981 btrfs_item_key_to_cpu(path->nodes[0], &key, 982 btrfs_item_key_to_cpu(path->nodes[0], &key,
982 path->slots[0]); 983 path->slots[0]);
983 if (key.objectid != inode->i_ino || 984 if (key.objectid != ino ||
984 key.type != BTRFS_INODE_REF_KEY) 985 key.type != BTRFS_INODE_REF_KEY)
985 break; 986 break;
986 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 987 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
@@ -1011,10 +1012,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1011 if (inode->i_nlink == 0) { 1012 if (inode->i_nlink == 0) {
1012 if (S_ISDIR(inode->i_mode)) { 1013 if (S_ISDIR(inode->i_mode)) {
1013 ret = replay_dir_deletes(trans, root, NULL, path, 1014 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1); 1015 ino, 1);
1015 BUG_ON(ret); 1016 BUG_ON(ret);
1016 } 1017 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino); 1018 ret = insert_orphan_item(trans, root, ino);
1018 BUG_ON(ret); 1019 BUG_ON(ret);
1019 } 1020 }
1020 btrfs_free_path(path); 1021 btrfs_free_path(path);
@@ -2197,6 +2198,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2197 int ret; 2198 int ret;
2198 int err = 0; 2199 int err = 0;
2199 int bytes_del = 0; 2200 int bytes_del = 0;
2201 u64 dir_ino = btrfs_ino(dir);
2200 2202
2201 if (BTRFS_I(dir)->logged_trans < trans->transid) 2203 if (BTRFS_I(dir)->logged_trans < trans->transid)
2202 return 0; 2204 return 0;
@@ -2214,7 +2216,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2214 goto out_unlock; 2216 goto out_unlock;
2215 } 2217 }
2216 2218
2217 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2219 di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
2218 name, name_len, -1); 2220 name, name_len, -1);
2219 if (IS_ERR(di)) { 2221 if (IS_ERR(di)) {
2220 err = PTR_ERR(di); 2222 err = PTR_ERR(di);
@@ -2226,7 +2228,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2226 BUG_ON(ret); 2228 BUG_ON(ret);
2227 } 2229 }
2228 btrfs_release_path(path); 2230 btrfs_release_path(path);
2229 di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, 2231 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
2230 index, name, name_len, -1); 2232 index, name, name_len, -1);
2231 if (IS_ERR(di)) { 2233 if (IS_ERR(di)) {
2232 err = PTR_ERR(di); 2234 err = PTR_ERR(di);
@@ -2244,7 +2246,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2244 if (bytes_del) { 2246 if (bytes_del) {
2245 struct btrfs_key key; 2247 struct btrfs_key key;
2246 2248
2247 key.objectid = dir->i_ino; 2249 key.objectid = dir_ino;
2248 key.offset = 0; 2250 key.offset = 0;
2249 key.type = BTRFS_INODE_ITEM_KEY; 2251 key.type = BTRFS_INODE_ITEM_KEY;
2250 btrfs_release_path(path); 2252 btrfs_release_path(path);
@@ -2303,7 +2305,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2303 log = root->log_root; 2305 log = root->log_root;
2304 mutex_lock(&BTRFS_I(inode)->log_mutex); 2306 mutex_lock(&BTRFS_I(inode)->log_mutex);
2305 2307
2306 ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, 2308 ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
2307 dirid, &index); 2309 dirid, &index);
2308 mutex_unlock(&BTRFS_I(inode)->log_mutex); 2310 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2309 if (ret == -ENOSPC) { 2311 if (ret == -ENOSPC) {
@@ -2369,13 +2371,14 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2369 int nritems; 2371 int nritems;
2370 u64 first_offset = min_offset; 2372 u64 first_offset = min_offset;
2371 u64 last_offset = (u64)-1; 2373 u64 last_offset = (u64)-1;
2374 u64 ino = btrfs_ino(inode);
2372 2375
2373 log = root->log_root; 2376 log = root->log_root;
2374 max_key.objectid = inode->i_ino; 2377 max_key.objectid = ino;
2375 max_key.offset = (u64)-1; 2378 max_key.offset = (u64)-1;
2376 max_key.type = key_type; 2379 max_key.type = key_type;
2377 2380
2378 min_key.objectid = inode->i_ino; 2381 min_key.objectid = ino;
2379 min_key.type = key_type; 2382 min_key.type = key_type;
2380 min_key.offset = min_offset; 2383 min_key.offset = min_offset;
2381 2384
@@ -2388,9 +2391,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2388 * we didn't find anything from this transaction, see if there 2391 * we didn't find anything from this transaction, see if there
2389 * is anything at all 2392 * is anything at all
2390 */ 2393 */
2391 if (ret != 0 || min_key.objectid != inode->i_ino || 2394 if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
2392 min_key.type != key_type) { 2395 min_key.objectid = ino;
2393 min_key.objectid = inode->i_ino;
2394 min_key.type = key_type; 2396 min_key.type = key_type;
2395 min_key.offset = (u64)-1; 2397 min_key.offset = (u64)-1;
2396 btrfs_release_path(path); 2398 btrfs_release_path(path);
@@ -2399,7 +2401,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2399 btrfs_release_path(path); 2401 btrfs_release_path(path);
2400 return ret; 2402 return ret;
2401 } 2403 }
2402 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2404 ret = btrfs_previous_item(root, path, ino, key_type);
2403 2405
2404 /* if ret == 0 there are items for this type, 2406 /* if ret == 0 there are items for this type,
2405 * create a range to tell us the last key of this type. 2407 * create a range to tell us the last key of this type.
@@ -2417,7 +2419,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2417 } 2419 }
2418 2420
2419 /* go backward to find any previous key */ 2421 /* go backward to find any previous key */
2420 ret = btrfs_previous_item(root, path, inode->i_ino, key_type); 2422 ret = btrfs_previous_item(root, path, ino, key_type);
2421 if (ret == 0) { 2423 if (ret == 0) {
2422 struct btrfs_key tmp; 2424 struct btrfs_key tmp;
2423 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2425 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
@@ -2452,8 +2454,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2452 for (i = path->slots[0]; i < nritems; i++) { 2454 for (i = path->slots[0]; i < nritems; i++) {
2453 btrfs_item_key_to_cpu(src, &min_key, i); 2455 btrfs_item_key_to_cpu(src, &min_key, i);
2454 2456
2455 if (min_key.objectid != inode->i_ino || 2457 if (min_key.objectid != ino || min_key.type != key_type)
2456 min_key.type != key_type)
2457 goto done; 2458 goto done;
2458 ret = overwrite_item(trans, log, dst_path, src, i, 2459 ret = overwrite_item(trans, log, dst_path, src, i,
2459 &min_key); 2460 &min_key);
@@ -2474,7 +2475,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2474 goto done; 2475 goto done;
2475 } 2476 }
2476 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); 2477 btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
2477 if (tmp.objectid != inode->i_ino || tmp.type != key_type) { 2478 if (tmp.objectid != ino || tmp.type != key_type) {
2478 last_offset = (u64)-1; 2479 last_offset = (u64)-1;
2479 goto done; 2480 goto done;
2480 } 2481 }
@@ -2500,8 +2501,7 @@ done:
2500 * is valid 2501 * is valid
2501 */ 2502 */
2502 ret = insert_dir_log_key(trans, log, path, key_type, 2503 ret = insert_dir_log_key(trans, log, path, key_type,
2503 inode->i_ino, first_offset, 2504 ino, first_offset, last_offset);
2504 last_offset);
2505 if (ret) 2505 if (ret)
2506 err = ret; 2506 err = ret;
2507 } 2507 }
@@ -2745,6 +2745,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2745 int nritems; 2745 int nritems;
2746 int ins_start_slot = 0; 2746 int ins_start_slot = 0;
2747 int ins_nr; 2747 int ins_nr;
2748 u64 ino = btrfs_ino(inode);
2748 2749
2749 log = root->log_root; 2750 log = root->log_root;
2750 2751
@@ -2757,11 +2758,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2757 return -ENOMEM; 2758 return -ENOMEM;
2758 } 2759 }
2759 2760
2760 min_key.objectid = inode->i_ino; 2761 min_key.objectid = ino;
2761 min_key.type = BTRFS_INODE_ITEM_KEY; 2762 min_key.type = BTRFS_INODE_ITEM_KEY;
2762 min_key.offset = 0; 2763 min_key.offset = 0;
2763 2764
2764 max_key.objectid = inode->i_ino; 2765 max_key.objectid = ino;
2765 2766
2766 /* today the code can only do partial logging of directories */ 2767 /* today the code can only do partial logging of directories */
2767 if (!S_ISDIR(inode->i_mode)) 2768 if (!S_ISDIR(inode->i_mode))
@@ -2773,6 +2774,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2773 max_key.type = (u8)-1; 2774 max_key.type = (u8)-1;
2774 max_key.offset = (u64)-1; 2775 max_key.offset = (u64)-1;
2775 2776
2777 ret = btrfs_commit_inode_delayed_items(trans, inode);
2778 if (ret) {
2779 btrfs_free_path(path);
2780 btrfs_free_path(dst_path);
2781 return ret;
2782 }
2783
2776 mutex_lock(&BTRFS_I(inode)->log_mutex); 2784 mutex_lock(&BTRFS_I(inode)->log_mutex);
2777 2785
2778 /* 2786 /*
@@ -2784,8 +2792,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2784 2792
2785 if (inode_only == LOG_INODE_EXISTS) 2793 if (inode_only == LOG_INODE_EXISTS)
2786 max_key_type = BTRFS_XATTR_ITEM_KEY; 2794 max_key_type = BTRFS_XATTR_ITEM_KEY;
2787 ret = drop_objectid_items(trans, log, path, 2795 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
2788 inode->i_ino, max_key_type);
2789 } else { 2796 } else {
2790 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); 2797 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
2791 } 2798 }
@@ -2803,7 +2810,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2803 break; 2810 break;
2804again: 2811again:
2805 /* note, ins_nr might be > 0 here, cleanup outside the loop */ 2812 /* note, ins_nr might be > 0 here, cleanup outside the loop */
2806 if (min_key.objectid != inode->i_ino) 2813 if (min_key.objectid != ino)
2807 break; 2814 break;
2808 if (min_key.type > max_key.type) 2815 if (min_key.type > max_key.type)
2809 break; 2816 break;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 4ca88d1e18e..f3107e4b4d5 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -44,7 +44,7 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
44 return -ENOMEM; 44 return -ENOMEM;
45 45
46 /* lookup the xattr by name */ 46 /* lookup the xattr by name */
47 di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, 47 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
48 strlen(name), 0); 48 strlen(name), 0);
49 if (!di) { 49 if (!di) {
50 ret = -ENODATA; 50 ret = -ENODATA;
@@ -103,7 +103,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
103 return -ENOMEM; 103 return -ENOMEM;
104 104
105 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
107 strlen(name), -1); 107 strlen(name), -1);
108 if (IS_ERR(di)) { 108 if (IS_ERR(di)) {
109 ret = PTR_ERR(di); 109 ret = PTR_ERR(di);
@@ -136,7 +136,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
136 } 136 }
137 137
138 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino, 139 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
140 name, name_len, value, size); 140 name, name_len, value, size);
141 BUG_ON(ret); 141 BUG_ON(ret);
142out: 142out:
@@ -190,7 +190,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
190 * NOTE: we set key.offset = 0; because we want to start with the 190 * NOTE: we set key.offset = 0; because we want to start with the
191 * first xattr that we find and walk forward 191 * first xattr that we find and walk forward
192 */ 192 */
193 key.objectid = inode->i_ino; 193 key.objectid = btrfs_ino(inode);
194 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 194 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
195 key.offset = 0; 195 key.offset = 0;
196 196