diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 58ddc4442159..c6a22d783c35 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -40,6 +40,263 @@ | |||
40 | #include "locking.h" | 40 | #include "locking.h" |
41 | #include "compat.h" | 41 | #include "compat.h" |
42 | 42 | ||
43 | /* | ||
44 | * when auto defrag is enabled we | ||
45 | * queue up these defrag structs to remember which | ||
46 | * inodes need defragging passes | ||
47 | */ | ||
48 | struct inode_defrag { | ||
49 | struct rb_node rb_node; | ||
50 | /* objectid */ | ||
51 | u64 ino; | ||
52 | /* | ||
53 | * transid where the defrag was added, we search for | ||
54 | * extents newer than this | ||
55 | */ | ||
56 | u64 transid; | ||
57 | |||
58 | /* root objectid */ | ||
59 | u64 root; | ||
60 | |||
61 | /* last offset we were able to defrag */ | ||
62 | u64 last_offset; | ||
63 | |||
64 | /* if we've wrapped around back to zero once already */ | ||
65 | int cycled; | ||
66 | }; | ||
67 | |||
68 | /* pop a record for an inode into the defrag tree. The lock | ||
69 | * must be held already | ||
70 | * | ||
71 | * If you're inserting a record for an older transid than an | ||
72 | * existing record, the transid already in the tree is lowered | ||
73 | * | ||
74 | * If an existing record is found the defrag item you | ||
75 | * pass in is freed | ||
76 | */ | ||
77 | static int __btrfs_add_inode_defrag(struct inode *inode, | ||
78 | struct inode_defrag *defrag) | ||
79 | { | ||
80 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
81 | struct inode_defrag *entry; | ||
82 | struct rb_node **p; | ||
83 | struct rb_node *parent = NULL; | ||
84 | |||
85 | p = &root->fs_info->defrag_inodes.rb_node; | ||
86 | while (*p) { | ||
87 | parent = *p; | ||
88 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
89 | |||
90 | if (defrag->ino < entry->ino) | ||
91 | p = &parent->rb_left; | ||
92 | else if (defrag->ino > entry->ino) | ||
93 | p = &parent->rb_right; | ||
94 | else { | ||
95 | /* if we're reinserting an entry for | ||
96 | * an old defrag run, make sure to | ||
97 | * lower the transid of our existing record | ||
98 | */ | ||
99 | if (defrag->transid < entry->transid) | ||
100 | entry->transid = defrag->transid; | ||
101 | if (defrag->last_offset > entry->last_offset) | ||
102 | entry->last_offset = defrag->last_offset; | ||
103 | goto exists; | ||
104 | } | ||
105 | } | ||
106 | BTRFS_I(inode)->in_defrag = 1; | ||
107 | rb_link_node(&defrag->rb_node, parent, p); | ||
108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | ||
109 | return 0; | ||
110 | |||
111 | exists: | ||
112 | kfree(defrag); | ||
113 | return 0; | ||
114 | |||
115 | } | ||
116 | |||
117 | /* | ||
118 | * insert a defrag record for this inode if auto defrag is | ||
119 | * enabled | ||
120 | */ | ||
121 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | ||
122 | struct inode *inode) | ||
123 | { | ||
124 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
125 | struct inode_defrag *defrag; | ||
126 | int ret = 0; | ||
127 | u64 transid; | ||
128 | |||
129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | ||
130 | return 0; | ||
131 | |||
132 | if (root->fs_info->closing) | ||
133 | return 0; | ||
134 | |||
135 | if (BTRFS_I(inode)->in_defrag) | ||
136 | return 0; | ||
137 | |||
138 | if (trans) | ||
139 | transid = trans->transid; | ||
140 | else | ||
141 | transid = BTRFS_I(inode)->root->last_trans; | ||
142 | |||
143 | defrag = kzalloc(sizeof(*defrag), GFP_NOFS); | ||
144 | if (!defrag) | ||
145 | return -ENOMEM; | ||
146 | |||
147 | defrag->ino = inode->i_ino; | ||
148 | defrag->transid = transid; | ||
149 | defrag->root = root->root_key.objectid; | ||
150 | |||
151 | spin_lock(&root->fs_info->defrag_inodes_lock); | ||
152 | if (!BTRFS_I(inode)->in_defrag) | ||
153 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
154 | spin_unlock(&root->fs_info->defrag_inodes_lock); | ||
155 | return ret; | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * must be called with the defrag_inodes lock held | ||
160 | */ | ||
161 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, | ||
162 | struct rb_node **next) | ||
163 | { | ||
164 | struct inode_defrag *entry = NULL; | ||
165 | struct rb_node *p; | ||
166 | struct rb_node *parent = NULL; | ||
167 | |||
168 | p = info->defrag_inodes.rb_node; | ||
169 | while (p) { | ||
170 | parent = p; | ||
171 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
172 | |||
173 | if (ino < entry->ino) | ||
174 | p = parent->rb_left; | ||
175 | else if (ino > entry->ino) | ||
176 | p = parent->rb_right; | ||
177 | else | ||
178 | return entry; | ||
179 | } | ||
180 | |||
181 | if (next) { | ||
182 | while (parent && ino > entry->ino) { | ||
183 | parent = rb_next(parent); | ||
184 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
185 | } | ||
186 | *next = parent; | ||
187 | } | ||
188 | return NULL; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * run through the list of inodes in the FS that need | ||
193 | * defragging | ||
194 | */ | ||
195 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
196 | { | ||
197 | struct inode_defrag *defrag; | ||
198 | struct btrfs_root *inode_root; | ||
199 | struct inode *inode; | ||
200 | struct rb_node *n; | ||
201 | struct btrfs_key key; | ||
202 | struct btrfs_ioctl_defrag_range_args range; | ||
203 | u64 first_ino = 0; | ||
204 | int num_defrag; | ||
205 | int defrag_batch = 1024; | ||
206 | |||
207 | memset(&range, 0, sizeof(range)); | ||
208 | range.len = (u64)-1; | ||
209 | |||
210 | atomic_inc(&fs_info->defrag_running); | ||
211 | spin_lock(&fs_info->defrag_inodes_lock); | ||
212 | while(1) { | ||
213 | n = NULL; | ||
214 | |||
215 | /* find an inode to defrag */ | ||
216 | defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); | ||
217 | if (!defrag) { | ||
218 | if (n) | ||
219 | defrag = rb_entry(n, struct inode_defrag, rb_node); | ||
220 | else if (first_ino) { | ||
221 | first_ino = 0; | ||
222 | continue; | ||
223 | } else { | ||
224 | break; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | /* remove it from the rbtree */ | ||
229 | first_ino = defrag->ino + 1; | ||
230 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | ||
231 | |||
232 | if (fs_info->closing) | ||
233 | goto next_free; | ||
234 | |||
235 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
236 | |||
237 | /* get the inode */ | ||
238 | key.objectid = defrag->root; | ||
239 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
240 | key.offset = (u64)-1; | ||
241 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
242 | if (IS_ERR(inode_root)) | ||
243 | goto next; | ||
244 | |||
245 | key.objectid = defrag->ino; | ||
246 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
247 | key.offset = 0; | ||
248 | |||
249 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
250 | if (IS_ERR(inode)) | ||
251 | goto next; | ||
252 | |||
253 | /* do a chunk of defrag */ | ||
254 | BTRFS_I(inode)->in_defrag = 0; | ||
255 | range.start = defrag->last_offset; | ||
256 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
257 | defrag_batch); | ||
258 | /* | ||
259 | * if we filled the whole defrag batch, there | ||
260 | * must be more work to do. Queue this defrag | ||
261 | * again | ||
262 | */ | ||
263 | if (num_defrag == defrag_batch) { | ||
264 | defrag->last_offset = range.start; | ||
265 | __btrfs_add_inode_defrag(inode, defrag); | ||
266 | /* | ||
267 | * we don't want to kfree defrag, we added it back to | ||
268 | * the rbtree | ||
269 | */ | ||
270 | defrag = NULL; | ||
271 | } else if (defrag->last_offset && !defrag->cycled) { | ||
272 | /* | ||
273 | * we didn't fill our defrag batch, but | ||
274 | * we didn't start at zero. Make sure we loop | ||
275 | * around to the start of the file. | ||
276 | */ | ||
277 | defrag->last_offset = 0; | ||
278 | defrag->cycled = 1; | ||
279 | __btrfs_add_inode_defrag(inode, defrag); | ||
280 | defrag = NULL; | ||
281 | } | ||
282 | |||
283 | iput(inode); | ||
284 | next: | ||
285 | spin_lock(&fs_info->defrag_inodes_lock); | ||
286 | next_free: | ||
287 | kfree(defrag); | ||
288 | } | ||
289 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
290 | |||
291 | atomic_dec(&fs_info->defrag_running); | ||
292 | |||
293 | /* | ||
294 | * during unmount, we use the transaction_wait queue to | ||
295 | * wait for the defragger to stop | ||
296 | */ | ||
297 | wake_up(&fs_info->transaction_wait); | ||
298 | return 0; | ||
299 | } | ||
43 | 300 | ||
44 | /* simple helper to fault in pages and copy. This should go away | 301 | /* simple helper to fault in pages and copy. This should go away |
45 | * and be replaced with calls into generic code. | 302 | * and be replaced with calls into generic code. |