diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/btrfs/inode-map.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/btrfs/inode-map.c')
-rw-r--r-- | fs/btrfs/inode-map.c | 477 |
1 files changed, 471 insertions, 6 deletions
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c56eb5909172..b4087e0fa871 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -16,11 +16,476 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/delay.h> | ||
20 | #include <linux/kthread.h> | ||
21 | #include <linux/pagemap.h> | ||
22 | |||
19 | #include "ctree.h" | 23 | #include "ctree.h" |
20 | #include "disk-io.h" | 24 | #include "disk-io.h" |
25 | #include "free-space-cache.h" | ||
26 | #include "inode-map.h" | ||
21 | #include "transaction.h" | 27 | #include "transaction.h" |
22 | 28 | ||
23 | int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | 29 | static int caching_kthread(void *data) |
30 | { | ||
31 | struct btrfs_root *root = data; | ||
32 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
33 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
34 | struct btrfs_key key; | ||
35 | struct btrfs_path *path; | ||
36 | struct extent_buffer *leaf; | ||
37 | u64 last = (u64)-1; | ||
38 | int slot; | ||
39 | int ret; | ||
40 | |||
41 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
42 | return 0; | ||
43 | |||
44 | path = btrfs_alloc_path(); | ||
45 | if (!path) | ||
46 | return -ENOMEM; | ||
47 | |||
48 | /* Since the commit root is read-only, we can safely skip locking. */ | ||
49 | path->skip_locking = 1; | ||
50 | path->search_commit_root = 1; | ||
51 | path->reada = 2; | ||
52 | |||
53 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | ||
54 | key.offset = 0; | ||
55 | key.type = BTRFS_INODE_ITEM_KEY; | ||
56 | again: | ||
57 | /* need to make sure the commit_root doesn't disappear */ | ||
58 | mutex_lock(&root->fs_commit_mutex); | ||
59 | |||
60 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
61 | if (ret < 0) | ||
62 | goto out; | ||
63 | |||
64 | while (1) { | ||
65 | if (btrfs_fs_closing(fs_info)) | ||
66 | goto out; | ||
67 | |||
68 | leaf = path->nodes[0]; | ||
69 | slot = path->slots[0]; | ||
70 | if (slot >= btrfs_header_nritems(leaf)) { | ||
71 | ret = btrfs_next_leaf(root, path); | ||
72 | if (ret < 0) | ||
73 | goto out; | ||
74 | else if (ret > 0) | ||
75 | break; | ||
76 | |||
77 | if (need_resched() || | ||
78 | btrfs_transaction_in_commit(fs_info)) { | ||
79 | leaf = path->nodes[0]; | ||
80 | |||
81 | if (btrfs_header_nritems(leaf) == 0) { | ||
82 | WARN_ON(1); | ||
83 | break; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Save the key so we can advances forward | ||
88 | * in the next search. | ||
89 | */ | ||
90 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
91 | btrfs_release_path(path); | ||
92 | root->cache_progress = last; | ||
93 | mutex_unlock(&root->fs_commit_mutex); | ||
94 | schedule_timeout(1); | ||
95 | goto again; | ||
96 | } else | ||
97 | continue; | ||
98 | } | ||
99 | |||
100 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
101 | |||
102 | if (key.type != BTRFS_INODE_ITEM_KEY) | ||
103 | goto next; | ||
104 | |||
105 | if (key.objectid >= root->highest_objectid) | ||
106 | break; | ||
107 | |||
108 | if (last != (u64)-1 && last + 1 != key.objectid) { | ||
109 | __btrfs_add_free_space(ctl, last + 1, | ||
110 | key.objectid - last - 1); | ||
111 | wake_up(&root->cache_wait); | ||
112 | } | ||
113 | |||
114 | last = key.objectid; | ||
115 | next: | ||
116 | path->slots[0]++; | ||
117 | } | ||
118 | |||
119 | if (last < root->highest_objectid - 1) { | ||
120 | __btrfs_add_free_space(ctl, last + 1, | ||
121 | root->highest_objectid - last - 1); | ||
122 | } | ||
123 | |||
124 | spin_lock(&root->cache_lock); | ||
125 | root->cached = BTRFS_CACHE_FINISHED; | ||
126 | spin_unlock(&root->cache_lock); | ||
127 | |||
128 | root->cache_progress = (u64)-1; | ||
129 | btrfs_unpin_free_ino(root); | ||
130 | out: | ||
131 | wake_up(&root->cache_wait); | ||
132 | mutex_unlock(&root->fs_commit_mutex); | ||
133 | |||
134 | btrfs_free_path(path); | ||
135 | |||
136 | return ret; | ||
137 | } | ||
138 | |||
139 | static void start_caching(struct btrfs_root *root) | ||
140 | { | ||
141 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
142 | struct task_struct *tsk; | ||
143 | int ret; | ||
144 | u64 objectid; | ||
145 | |||
146 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
147 | return; | ||
148 | |||
149 | spin_lock(&root->cache_lock); | ||
150 | if (root->cached != BTRFS_CACHE_NO) { | ||
151 | spin_unlock(&root->cache_lock); | ||
152 | return; | ||
153 | } | ||
154 | |||
155 | root->cached = BTRFS_CACHE_STARTED; | ||
156 | spin_unlock(&root->cache_lock); | ||
157 | |||
158 | ret = load_free_ino_cache(root->fs_info, root); | ||
159 | if (ret == 1) { | ||
160 | spin_lock(&root->cache_lock); | ||
161 | root->cached = BTRFS_CACHE_FINISHED; | ||
162 | spin_unlock(&root->cache_lock); | ||
163 | return; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * It can be quite time-consuming to fill the cache by searching | ||
168 | * through the extent tree, and this can keep ino allocation path | ||
169 | * waiting. Therefore at start we quickly find out the highest | ||
170 | * inode number and we know we can use inode numbers which fall in | ||
171 | * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID]. | ||
172 | */ | ||
173 | ret = btrfs_find_free_objectid(root, &objectid); | ||
174 | if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) { | ||
175 | __btrfs_add_free_space(ctl, objectid, | ||
176 | BTRFS_LAST_FREE_OBJECTID - objectid + 1); | ||
177 | } | ||
178 | |||
179 | tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", | ||
180 | root->root_key.objectid); | ||
181 | BUG_ON(IS_ERR(tsk)); | ||
182 | } | ||
183 | |||
184 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) | ||
185 | { | ||
186 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
187 | return btrfs_find_free_objectid(root, objectid); | ||
188 | |||
189 | again: | ||
190 | *objectid = btrfs_find_ino_for_alloc(root); | ||
191 | |||
192 | if (*objectid != 0) | ||
193 | return 0; | ||
194 | |||
195 | start_caching(root); | ||
196 | |||
197 | wait_event(root->cache_wait, | ||
198 | root->cached == BTRFS_CACHE_FINISHED || | ||
199 | root->free_ino_ctl->free_space > 0); | ||
200 | |||
201 | if (root->cached == BTRFS_CACHE_FINISHED && | ||
202 | root->free_ino_ctl->free_space == 0) | ||
203 | return -ENOSPC; | ||
204 | else | ||
205 | goto again; | ||
206 | } | ||
207 | |||
208 | void btrfs_return_ino(struct btrfs_root *root, u64 objectid) | ||
209 | { | ||
210 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
211 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; | ||
212 | |||
213 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
214 | return; | ||
215 | |||
216 | again: | ||
217 | if (root->cached == BTRFS_CACHE_FINISHED) { | ||
218 | __btrfs_add_free_space(ctl, objectid, 1); | ||
219 | } else { | ||
220 | /* | ||
221 | * If we are in the process of caching free ino chunks, | ||
222 | * to avoid adding the same inode number to the free_ino | ||
223 | * tree twice due to cross transaction, we'll leave it | ||
224 | * in the pinned tree until a transaction is committed | ||
225 | * or the caching work is done. | ||
226 | */ | ||
227 | |||
228 | mutex_lock(&root->fs_commit_mutex); | ||
229 | spin_lock(&root->cache_lock); | ||
230 | if (root->cached == BTRFS_CACHE_FINISHED) { | ||
231 | spin_unlock(&root->cache_lock); | ||
232 | mutex_unlock(&root->fs_commit_mutex); | ||
233 | goto again; | ||
234 | } | ||
235 | spin_unlock(&root->cache_lock); | ||
236 | |||
237 | start_caching(root); | ||
238 | |||
239 | if (objectid <= root->cache_progress || | ||
240 | objectid > root->highest_objectid) | ||
241 | __btrfs_add_free_space(ctl, objectid, 1); | ||
242 | else | ||
243 | __btrfs_add_free_space(pinned, objectid, 1); | ||
244 | |||
245 | mutex_unlock(&root->fs_commit_mutex); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | /* | ||
250 | * When a transaction is committed, we'll move those inode numbers which | ||
251 | * are smaller than root->cache_progress from pinned tree to free_ino tree, | ||
252 | * and others will just be dropped, because the commit root we were | ||
253 | * searching has changed. | ||
254 | * | ||
255 | * Must be called with root->fs_commit_mutex held | ||
256 | */ | ||
257 | void btrfs_unpin_free_ino(struct btrfs_root *root) | ||
258 | { | ||
259 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
260 | struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; | ||
261 | struct btrfs_free_space *info; | ||
262 | struct rb_node *n; | ||
263 | u64 count; | ||
264 | |||
265 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
266 | return; | ||
267 | |||
268 | while (1) { | ||
269 | n = rb_first(rbroot); | ||
270 | if (!n) | ||
271 | break; | ||
272 | |||
273 | info = rb_entry(n, struct btrfs_free_space, offset_index); | ||
274 | BUG_ON(info->bitmap); | ||
275 | |||
276 | if (info->offset > root->cache_progress) | ||
277 | goto free; | ||
278 | else if (info->offset + info->bytes > root->cache_progress) | ||
279 | count = root->cache_progress - info->offset + 1; | ||
280 | else | ||
281 | count = info->bytes; | ||
282 | |||
283 | __btrfs_add_free_space(ctl, info->offset, count); | ||
284 | free: | ||
285 | rb_erase(&info->offset_index, rbroot); | ||
286 | kfree(info); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | #define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space)) | ||
291 | #define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8) | ||
292 | |||
293 | /* | ||
294 | * The goal is to keep the memory used by the free_ino tree won't | ||
295 | * exceed the memory if we use bitmaps only. | ||
296 | */ | ||
297 | static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | ||
298 | { | ||
299 | struct btrfs_free_space *info; | ||
300 | struct rb_node *n; | ||
301 | int max_ino; | ||
302 | int max_bitmaps; | ||
303 | |||
304 | n = rb_last(&ctl->free_space_offset); | ||
305 | if (!n) { | ||
306 | ctl->extents_thresh = INIT_THRESHOLD; | ||
307 | return; | ||
308 | } | ||
309 | info = rb_entry(n, struct btrfs_free_space, offset_index); | ||
310 | |||
311 | /* | ||
312 | * Find the maximum inode number in the filesystem. Note we | ||
313 | * ignore the fact that this can be a bitmap, because we are | ||
314 | * not doing precise calculation. | ||
315 | */ | ||
316 | max_ino = info->bytes - 1; | ||
317 | |||
318 | max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP; | ||
319 | if (max_bitmaps <= ctl->total_bitmaps) { | ||
320 | ctl->extents_thresh = 0; | ||
321 | return; | ||
322 | } | ||
323 | |||
324 | ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) * | ||
325 | PAGE_CACHE_SIZE / sizeof(*info); | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * We don't fall back to bitmap, if we are below the extents threshold | ||
330 | * or this chunk of inode numbers is a big one. | ||
331 | */ | ||
332 | static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | ||
333 | struct btrfs_free_space *info) | ||
334 | { | ||
335 | if (ctl->free_extents < ctl->extents_thresh || | ||
336 | info->bytes > INODES_PER_BITMAP / 10) | ||
337 | return false; | ||
338 | |||
339 | return true; | ||
340 | } | ||
341 | |||
342 | static struct btrfs_free_space_op free_ino_op = { | ||
343 | .recalc_thresholds = recalculate_thresholds, | ||
344 | .use_bitmap = use_bitmap, | ||
345 | }; | ||
346 | |||
347 | static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl) | ||
348 | { | ||
349 | } | ||
350 | |||
351 | static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl, | ||
352 | struct btrfs_free_space *info) | ||
353 | { | ||
354 | /* | ||
355 | * We always use extents for two reasons: | ||
356 | * | ||
357 | * - The pinned tree is only used during the process of caching | ||
358 | * work. | ||
359 | * - Make code simpler. See btrfs_unpin_free_ino(). | ||
360 | */ | ||
361 | return false; | ||
362 | } | ||
363 | |||
364 | static struct btrfs_free_space_op pinned_free_ino_op = { | ||
365 | .recalc_thresholds = pinned_recalc_thresholds, | ||
366 | .use_bitmap = pinned_use_bitmap, | ||
367 | }; | ||
368 | |||
369 | void btrfs_init_free_ino_ctl(struct btrfs_root *root) | ||
370 | { | ||
371 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
372 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; | ||
373 | |||
374 | spin_lock_init(&ctl->tree_lock); | ||
375 | ctl->unit = 1; | ||
376 | ctl->start = 0; | ||
377 | ctl->private = NULL; | ||
378 | ctl->op = &free_ino_op; | ||
379 | |||
380 | /* | ||
381 | * Initially we allow to use 16K of ram to cache chunks of | ||
382 | * inode numbers before we resort to bitmaps. This is somewhat | ||
383 | * arbitrary, but it will be adjusted in runtime. | ||
384 | */ | ||
385 | ctl->extents_thresh = INIT_THRESHOLD; | ||
386 | |||
387 | spin_lock_init(&pinned->tree_lock); | ||
388 | pinned->unit = 1; | ||
389 | pinned->start = 0; | ||
390 | pinned->private = NULL; | ||
391 | pinned->extents_thresh = 0; | ||
392 | pinned->op = &pinned_free_ino_op; | ||
393 | } | ||
394 | |||
395 | int btrfs_save_ino_cache(struct btrfs_root *root, | ||
396 | struct btrfs_trans_handle *trans) | ||
397 | { | ||
398 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | ||
399 | struct btrfs_path *path; | ||
400 | struct inode *inode; | ||
401 | u64 alloc_hint = 0; | ||
402 | int ret; | ||
403 | int prealloc; | ||
404 | bool retry = false; | ||
405 | |||
406 | /* only fs tree and subvol/snap needs ino cache */ | ||
407 | if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID && | ||
408 | (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
409 | root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) | ||
410 | return 0; | ||
411 | |||
412 | /* Don't save inode cache if we are deleting this root */ | ||
413 | if (btrfs_root_refs(&root->root_item) == 0 && | ||
414 | root != root->fs_info->tree_root) | ||
415 | return 0; | ||
416 | |||
417 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
418 | return 0; | ||
419 | |||
420 | path = btrfs_alloc_path(); | ||
421 | if (!path) | ||
422 | return -ENOMEM; | ||
423 | |||
424 | again: | ||
425 | inode = lookup_free_ino_inode(root, path); | ||
426 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | ||
427 | ret = PTR_ERR(inode); | ||
428 | goto out; | ||
429 | } | ||
430 | |||
431 | if (IS_ERR(inode)) { | ||
432 | BUG_ON(retry); | ||
433 | retry = true; | ||
434 | |||
435 | ret = create_free_ino_inode(root, trans, path); | ||
436 | if (ret) | ||
437 | goto out; | ||
438 | goto again; | ||
439 | } | ||
440 | |||
441 | BTRFS_I(inode)->generation = 0; | ||
442 | ret = btrfs_update_inode(trans, root, inode); | ||
443 | WARN_ON(ret); | ||
444 | |||
445 | if (i_size_read(inode) > 0) { | ||
446 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); | ||
447 | if (ret) | ||
448 | goto out_put; | ||
449 | } | ||
450 | |||
451 | spin_lock(&root->cache_lock); | ||
452 | if (root->cached != BTRFS_CACHE_FINISHED) { | ||
453 | ret = -1; | ||
454 | spin_unlock(&root->cache_lock); | ||
455 | goto out_put; | ||
456 | } | ||
457 | spin_unlock(&root->cache_lock); | ||
458 | |||
459 | spin_lock(&ctl->tree_lock); | ||
460 | prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; | ||
461 | prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE); | ||
462 | prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE; | ||
463 | spin_unlock(&ctl->tree_lock); | ||
464 | |||
465 | /* Just to make sure we have enough space */ | ||
466 | prealloc += 8 * PAGE_CACHE_SIZE; | ||
467 | |||
468 | ret = btrfs_check_data_free_space(inode, prealloc); | ||
469 | if (ret) | ||
470 | goto out_put; | ||
471 | |||
472 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, | ||
473 | prealloc, prealloc, &alloc_hint); | ||
474 | if (ret) | ||
475 | goto out_put; | ||
476 | btrfs_free_reserved_data_space(inode, prealloc); | ||
477 | |||
478 | out_put: | ||
479 | iput(inode); | ||
480 | out: | ||
481 | if (ret == 0) | ||
482 | ret = btrfs_write_out_ino_cache(root, trans, path); | ||
483 | |||
484 | btrfs_free_path(path); | ||
485 | return ret; | ||
486 | } | ||
487 | |||
488 | static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) | ||
24 | { | 489 | { |
25 | struct btrfs_path *path; | 490 | struct btrfs_path *path; |
26 | int ret; | 491 | int ret; |
@@ -30,7 +495,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
30 | int slot; | 495 | int slot; |
31 | 496 | ||
32 | path = btrfs_alloc_path(); | 497 | path = btrfs_alloc_path(); |
33 | BUG_ON(!path); | 498 | if (!path) |
499 | return -ENOMEM; | ||
34 | 500 | ||
35 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; | 501 | search_key.objectid = BTRFS_LAST_FREE_OBJECTID; |
36 | search_key.type = -1; | 502 | search_key.type = -1; |
@@ -54,15 +520,14 @@ error: | |||
54 | return ret; | 520 | return ret; |
55 | } | 521 | } |
56 | 522 | ||
57 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 523 | int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid) |
58 | struct btrfs_root *root, | ||
59 | u64 dirid, u64 *objectid) | ||
60 | { | 524 | { |
61 | int ret; | 525 | int ret; |
62 | mutex_lock(&root->objectid_mutex); | 526 | mutex_lock(&root->objectid_mutex); |
63 | 527 | ||
64 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { | 528 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { |
65 | ret = btrfs_find_highest_inode(root, &root->highest_objectid); | 529 | ret = btrfs_find_highest_objectid(root, |
530 | &root->highest_objectid); | ||
66 | if (ret) | 531 | if (ret) |
67 | goto out; | 532 | goto out; |
68 | } | 533 | } |